summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build274
-rw-r--r--tools/perf/util/annotate.c71
-rw-r--r--tools/perf/util/annotate.h19
-rw-r--r--tools/perf/util/auxtrace.c55
-rw-r--r--tools/perf/util/auxtrace.h10
-rw-r--r--tools/perf/util/block-range.c2
-rw-r--r--tools/perf/util/block-range.h6
-rw-r--r--tools/perf/util/bpf-event.c263
-rw-r--r--tools/perf/util/bpf-event.h38
-rw-r--r--tools/perf/util/bpf-loader.c35
-rw-r--r--tools/perf/util/bpf-loader.h7
-rw-r--r--tools/perf/util/bpf_map.c72
-rw-r--r--tools/perf/util/bpf_map.h22
-rw-r--r--tools/perf/util/branch.h27
-rw-r--r--tools/perf/util/build-id.c14
-rw-r--r--tools/perf/util/build-id.h3
-rw-r--r--tools/perf/util/c++/Build4
-rw-r--r--tools/perf/util/c++/clang.cpp2
-rw-r--r--tools/perf/util/callchain.c49
-rw-r--r--tools/perf/util/callchain.h22
-rw-r--r--tools/perf/util/color.c39
-rw-r--r--tools/perf/util/color.h1
-rw-r--r--tools/perf/util/color_config.c47
-rw-r--r--tools/perf/util/comm.c1
-rw-r--r--tools/perf/util/comm.h4
-rw-r--r--tools/perf/util/config.c9
-rw-r--r--tools/perf/util/cpu-set-sched.h50
-rw-r--r--tools/perf/util/cpumap.c23
-rw-r--r--tools/perf/util/cpumap.h1
-rw-r--r--tools/perf/util/cputopo.c277
-rw-r--r--tools/perf/util/cputopo.h33
-rw-r--r--tools/perf/util/cs-etm-decoder/Build2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c135
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h43
-rw-r--r--tools/perf/util/cs-etm.c978
-rw-r--r--tools/perf/util/cs-etm.h57
-rw-r--r--tools/perf/util/data-convert-bt.c4
-rw-r--r--tools/perf/util/data.c175
-rw-r--r--tools/perf/util/data.h16
-rw-r--r--tools/perf/util/db-export.c1
-rw-r--r--tools/perf/util/drv_configs.c78
-rw-r--r--tools/perf/util/drv_configs.h26
-rw-r--r--tools/perf/util/dso.c19
-rw-r--r--tools/perf/util/dso.h18
-rw-r--r--tools/perf/util/dump-insn.c8
-rw-r--r--tools/perf/util/dump-insn.h2
-rw-r--r--tools/perf/util/env.c2
-rw-r--r--tools/perf/util/env.h1
-rw-r--r--tools/perf/util/event.c105
-rw-r--r--tools/perf/util/event.h68
-rw-r--r--tools/perf/util/evlist.c49
-rw-r--r--tools/perf/util/evlist.h13
-rw-r--r--tools/perf/util/evsel.c57
-rw-r--r--tools/perf/util/evsel.h13
-rw-r--r--tools/perf/util/evsel_fprintf.c1
-rw-r--r--tools/perf/util/find-map.c (renamed from tools/perf/util/find-vdso-map.c)7
-rw-r--r--tools/perf/util/genelf.h6
-rw-r--r--tools/perf/util/get_current_dir_name.c18
-rw-r--r--tools/perf/util/header.c363
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c222
-rw-r--r--tools/perf/util/hist.h20
-rw-r--r--tools/perf/util/include/asm/uaccess.h2
-rw-r--r--tools/perf/util/intel-bts.c30
-rw-r--r--tools/perf/util/intel-pt-decoder/Build2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c43
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c8
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c5
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h1
-rw-r--r--tools/perf/util/intel-pt.c76
-rw-r--r--tools/perf/util/intlist.h2
-rw-r--r--tools/perf/util/jitdump.c3
-rw-r--r--tools/perf/util/kvm-stat.h7
-rw-r--r--tools/perf/util/machine.c201
-rw-r--r--tools/perf/util/machine.h23
-rw-r--r--tools/perf/util/map.c103
-rw-r--r--tools/perf/util/map.h92
-rw-r--r--tools/perf/util/map_groups.h91
-rw-r--r--tools/perf/util/map_symbol.h22
-rw-r--r--tools/perf/util/mem-events.c2
-rw-r--r--tools/perf/util/metricgroup.c10
-rw-r--r--tools/perf/util/metricgroup.h3
-rw-r--r--tools/perf/util/mmap.c249
-rw-r--r--tools/perf/util/mmap.h27
-rw-r--r--tools/perf/util/namespaces.c17
-rw-r--r--tools/perf/util/namespaces.h1
-rw-r--r--tools/perf/util/ordered-events.c50
-rw-r--r--tools/perf/util/ordered-events.h8
-rw-r--r--tools/perf/util/parse-events.c12
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/parse-events.y4
-rw-r--r--tools/perf/util/pmu.c51
-rw-r--r--tools/perf/util/pmu.h5
-rw-r--r--tools/perf/util/probe-event.c49
-rw-r--r--tools/perf/util/probe-event.h6
-rw-r--r--tools/perf/util/probe-file.c37
-rw-r--r--tools/perf/util/probe-file.h1
-rw-r--r--tools/perf/util/python.c7
-rw-r--r--tools/perf/util/rb_resort.h8
-rw-r--r--tools/perf/util/rblist.c28
-rw-r--r--tools/perf/util/rblist.h2
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h62
-rw-r--r--tools/perf/util/s390-cpumsf.c91
-rw-r--r--tools/perf/util/s390-sample-raw.c222
-rw-r--r--tools/perf/util/sample-raw.c18
-rw-r--r--tools/perf/util/sample-raw.h14
-rw-r--r--tools/perf/util/scripting-engines/Build4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c8
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c41
-rw-r--r--tools/perf/util/session.c124
-rw-r--r--tools/perf/util/setup.py9
-rw-r--r--tools/perf/util/sort.c67
-rw-r--r--tools/perf/util/sort.h9
-rw-r--r--tools/perf/util/srccode.c186
-rw-r--r--tools/perf/util/srccode.h20
-rw-r--r--tools/perf/util/srcline.c73
-rw-r--r--tools/perf/util/srcline.h14
-rw-r--r--tools/perf/util/stat-display.c17
-rw-r--r--tools/perf/util/stat-shadow.c8
-rw-r--r--tools/perf/util/strbuf.c1
-rw-r--r--tools/perf/util/strlist.h2
-rw-r--r--tools/perf/util/svghelper.c2
-rw-r--r--tools/perf/util/symbol-elf.c83
-rw-r--r--tools/perf/util/symbol-minimal.c1
-rw-r--r--tools/perf/util/symbol.c117
-rw-r--r--tools/perf/util/symbol.h109
-rw-r--r--tools/perf/util/symbol_conf.h73
-rw-r--r--tools/perf/util/symbol_fprintf.c3
-rw-r--r--tools/perf/util/thread-stack.c496
-rw-r--r--tools/perf/util/thread-stack.h13
-rw-r--r--tools/perf/util/thread.c16
-rw-r--r--tools/perf/util/thread.h16
-rw-r--r--tools/perf/util/tool.h5
-rw-r--r--tools/perf/util/top.c8
-rw-r--r--tools/perf/util/top.h10
-rw-r--r--tools/perf/util/trace-event-parse.c16
-rw-r--r--tools/perf/util/trace-event-read.c4
-rw-r--r--tools/perf/util/trace-event.c8
-rw-r--r--tools/perf/util/trace-event.h16
-rw-r--r--tools/perf/util/unwind-libdw.c6
-rw-r--r--tools/perf/util/unwind-libunwind-local.c1
-rw-r--r--tools/perf/util/unwind-libunwind.c1
-rw-r--r--tools/perf/util/util.c82
-rw-r--r--tools/perf/util/util.h7
-rw-r--r--tools/perf/util/vdso.c7
-rw-r--r--tools/perf/util/zlib.c1
147 files changed, 5560 insertions, 1809 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ecd9f9ceda77..8dd3102301ea 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,156 +1,164 @@
-libperf-y += annotate.o
-libperf-y += block-range.o
-libperf-y += build-id.o
-libperf-y += config.o
-libperf-y += ctype.o
-libperf-y += db-export.o
-libperf-y += env.o
-libperf-y += event.o
-libperf-y += evlist.o
-libperf-y += evsel.o
-libperf-y += evsel_fprintf.o
-libperf-y += find_bit.o
-libperf-y += kallsyms.o
-libperf-y += levenshtein.o
-libperf-y += llvm-utils.o
-libperf-y += mmap.o
-libperf-y += memswap.o
-libperf-y += parse-events.o
-libperf-y += perf_regs.o
-libperf-y += path.o
-libperf-y += print_binary.o
-libperf-y += rbtree.o
-libperf-y += libstring.o
-libperf-y += bitmap.o
-libperf-y += hweight.o
-libperf-y += smt.o
-libperf-y += strbuf.o
-libperf-y += string.o
-libperf-y += strlist.o
-libperf-y += strfilter.o
-libperf-y += top.o
-libperf-y += usage.o
-libperf-y += dso.o
-libperf-y += symbol.o
-libperf-y += symbol_fprintf.o
-libperf-y += color.o
-libperf-y += metricgroup.o
-libperf-y += header.o
-libperf-y += callchain.o
-libperf-y += values.o
-libperf-y += debug.o
-libperf-y += machine.o
-libperf-y += map.o
-libperf-y += pstack.o
-libperf-y += session.o
-libperf-$(CONFIG_TRACE) += syscalltbl.o
-libperf-y += ordered-events.o
-libperf-y += namespaces.o
-libperf-y += comm.o
-libperf-y += thread.o
-libperf-y += thread_map.o
-libperf-y += trace-event-parse.o
-libperf-y += parse-events-flex.o
-libperf-y += parse-events-bison.o
-libperf-y += pmu.o
-libperf-y += pmu-flex.o
-libperf-y += pmu-bison.o
-libperf-y += trace-event-read.o
-libperf-y += trace-event-info.o
-libperf-y += trace-event-scripting.o
-libperf-y += trace-event.o
-libperf-y += svghelper.o
-libperf-y += sort.o
-libperf-y += hist.o
-libperf-y += util.o
-libperf-y += xyarray.o
-libperf-y += cpumap.o
-libperf-y += cgroup.o
-libperf-y += target.o
-libperf-y += rblist.o
-libperf-y += intlist.o
-libperf-y += vdso.o
-libperf-y += counts.o
-libperf-y += stat.o
-libperf-y += stat-shadow.o
-libperf-y += stat-display.o
-libperf-y += record.o
-libperf-y += srcline.o
-libperf-y += data.o
-libperf-y += tsc.o
-libperf-y += cloexec.o
-libperf-y += call-path.o
-libperf-y += rwsem.o
-libperf-y += thread-stack.o
-libperf-$(CONFIG_AUXTRACE) += auxtrace.o
-libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
-libperf-$(CONFIG_AUXTRACE) += intel-pt.o
-libperf-$(CONFIG_AUXTRACE) += intel-bts.o
-libperf-$(CONFIG_AUXTRACE) += arm-spe.o
-libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
-libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
+perf-y += annotate.o
+perf-y += block-range.o
+perf-y += build-id.o
+perf-y += config.o
+perf-y += ctype.o
+perf-y += db-export.o
+perf-y += env.o
+perf-y += event.o
+perf-y += evlist.o
+perf-y += evsel.o
+perf-y += evsel_fprintf.o
+perf-y += find_bit.o
+perf-y += get_current_dir_name.o
+perf-y += kallsyms.o
+perf-y += levenshtein.o
+perf-y += llvm-utils.o
+perf-y += mmap.o
+perf-y += memswap.o
+perf-y += parse-events.o
+perf-y += perf_regs.o
+perf-y += path.o
+perf-y += print_binary.o
+perf-y += rbtree.o
+perf-y += libstring.o
+perf-y += bitmap.o
+perf-y += hweight.o
+perf-y += smt.o
+perf-y += strbuf.o
+perf-y += string.o
+perf-y += strlist.o
+perf-y += strfilter.o
+perf-y += top.o
+perf-y += usage.o
+perf-y += dso.o
+perf-y += symbol.o
+perf-y += symbol_fprintf.o
+perf-y += color.o
+perf-y += color_config.o
+perf-y += metricgroup.o
+perf-y += header.o
+perf-y += callchain.o
+perf-y += values.o
+perf-y += debug.o
+perf-y += machine.o
+perf-y += map.o
+perf-y += pstack.o
+perf-y += session.o
+perf-y += sample-raw.o
+perf-y += s390-sample-raw.o
+perf-$(CONFIG_TRACE) += syscalltbl.o
+perf-y += ordered-events.o
+perf-y += namespaces.o
+perf-y += comm.o
+perf-y += thread.o
+perf-y += thread_map.o
+perf-y += trace-event-parse.o
+perf-y += parse-events-flex.o
+perf-y += parse-events-bison.o
+perf-y += pmu.o
+perf-y += pmu-flex.o
+perf-y += pmu-bison.o
+perf-y += trace-event-read.o
+perf-y += trace-event-info.o
+perf-y += trace-event-scripting.o
+perf-y += trace-event.o
+perf-y += svghelper.o
+perf-y += sort.o
+perf-y += hist.o
+perf-y += util.o
+perf-y += xyarray.o
+perf-y += cpumap.o
+perf-y += cputopo.o
+perf-y += cgroup.o
+perf-y += target.o
+perf-y += rblist.o
+perf-y += intlist.o
+perf-y += vdso.o
+perf-y += counts.o
+perf-y += stat.o
+perf-y += stat-shadow.o
+perf-y += stat-display.o
+perf-y += record.o
+perf-y += srcline.o
+perf-y += srccode.o
+perf-y += data.o
+perf-y += tsc.o
+perf-y += cloexec.o
+perf-y += call-path.o
+perf-y += rwsem.o
+perf-y += thread-stack.o
+perf-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+perf-$(CONFIG_AUXTRACE) += intel-pt.o
+perf-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-$(CONFIG_AUXTRACE) += arm-spe.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD
-libperf-$(CONFIG_AUXTRACE) += cs-etm.o
-libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+perf-$(CONFIG_AUXTRACE) += cs-etm.o
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
endif
-libperf-y += parse-branch-options.o
-libperf-y += dump-insn.o
-libperf-y += parse-regs-options.o
-libperf-y += term.o
-libperf-y += help-unknown-cmd.o
-libperf-y += mem-events.o
-libperf-y += vsprintf.o
-libperf-y += drv_configs.o
-libperf-y += units.o
-libperf-y += time-utils.o
-libperf-y += expr-bison.o
-libperf-y += branch.o
-libperf-y += mem2node.o
-
-libperf-$(CONFIG_LIBBPF) += bpf-loader.o
-libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
-libperf-$(CONFIG_LIBELF) += symbol-elf.o
-libperf-$(CONFIG_LIBELF) += probe-file.o
-libperf-$(CONFIG_LIBELF) += probe-event.o
+perf-y += parse-branch-options.o
+perf-y += dump-insn.o
+perf-y += parse-regs-options.o
+perf-y += term.o
+perf-y += help-unknown-cmd.o
+perf-y += mem-events.o
+perf-y += vsprintf.o
+perf-y += units.o
+perf-y += time-utils.o
+perf-y += expr-bison.o
+perf-y += branch.o
+perf-y += mem2node.o
+
+perf-$(CONFIG_LIBBPF) += bpf-loader.o
+perf-$(CONFIG_LIBBPF) += bpf_map.o
+perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
+perf-$(CONFIG_LIBELF) += symbol-elf.o
+perf-$(CONFIG_LIBELF) += probe-file.o
+perf-$(CONFIG_LIBELF) += probe-event.o
ifndef CONFIG_LIBELF
-libperf-y += symbol-minimal.o
+perf-y += symbol-minimal.o
endif
ifndef CONFIG_SETNS
-libperf-y += setns.o
+perf-y += setns.o
endif
-libperf-$(CONFIG_DWARF) += probe-finder.o
-libperf-$(CONFIG_DWARF) += dwarf-aux.o
-libperf-$(CONFIG_DWARF) += dwarf-regs.o
+perf-$(CONFIG_DWARF) += probe-finder.o
+perf-$(CONFIG_DWARF) += dwarf-aux.o
+perf-$(CONFIG_DWARF) += dwarf-regs.o
-libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
-libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
-libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
-libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
+perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
+perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
+perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
-libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
-libperf-y += scripting-engines/
+perf-y += scripting-engines/
-libperf-$(CONFIG_ZLIB) += zlib.o
-libperf-$(CONFIG_LZMA) += lzma.o
-libperf-y += demangle-java.o
-libperf-y += demangle-rust.o
+perf-$(CONFIG_ZLIB) += zlib.o
+perf-$(CONFIG_LZMA) += lzma.o
+perf-y += demangle-java.o
+perf-y += demangle-rust.o
ifdef CONFIG_JITDUMP
-libperf-$(CONFIG_LIBELF) += jitdump.o
-libperf-$(CONFIG_LIBELF) += genelf.o
-libperf-$(CONFIG_DWARF) += genelf_debug.o
+perf-$(CONFIG_LIBELF) += jitdump.o
+perf-$(CONFIG_LIBELF) += genelf.o
+perf-$(CONFIG_DWARF) += genelf_debug.o
endif
-libperf-y += perf-hooks.o
+perf-y += perf-hooks.o
-libperf-$(CONFIG_CXX) += c++/
+perf-$(CONFIG_LIBBPF) += bpf-event.o
+
+perf-$(CONFIG_CXX) += c++/
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 28cd6a17491b..11a8a447a3af 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <inttypes.h>
+#include <libgen.h>
#include "util.h"
#include "ui/ui.h"
#include "sort.h"
@@ -16,6 +17,7 @@
#include "color.h"
#include "config.h"
#include "cache.h"
+#include "map.h"
#include "symbol.h"
#include "units.h"
#include "debug.h"
@@ -134,14 +136,20 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
return 0;
}
+#include "arch/arc/annotate/instructions.c"
#include "arch/arm/annotate/instructions.c"
#include "arch/arm64/annotate/instructions.c"
#include "arch/x86/annotate/instructions.c"
#include "arch/powerpc/annotate/instructions.c"
#include "arch/s390/annotate/instructions.c"
+#include "arch/sparc/annotate/instructions.c"
static struct arch architectures[] = {
{
+ .name = "arc",
+ .init = arc__annotate_init,
+ },
+ {
.name = "arm",
.init = arm__annotate_init,
},
@@ -170,6 +178,13 @@ static struct arch architectures[] = {
.comment_char = '#',
},
},
+ {
+ .name = "sparc",
+ .init = sparc__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
};
static void ins__delete(struct ins_operands *ops)
@@ -992,6 +1007,7 @@ static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64
static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
{
unsigned n_insn;
+ unsigned int cover_insn = 0;
u64 offset;
n_insn = annotation__count_insn(notes, start, end);
@@ -1005,21 +1021,34 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
for (offset = start; offset <= end; offset++) {
struct annotation_line *al = notes->offsets[offset];
- if (al)
+ if (al && al->ipc == 0.0) {
al->ipc = ipc;
+ cover_insn++;
+ }
+ }
+
+ if (cover_insn) {
+ notes->hit_cycles += ch->cycles;
+ notes->hit_insn += n_insn * ch->num;
+ notes->cover_insn += cover_insn;
}
}
}
void annotation__compute_ipc(struct annotation *notes, size_t size)
{
- u64 offset;
+ s64 offset;
if (!notes->src || !notes->src->cycles_hist)
return;
+ notes->total_insn = annotation__count_insn(notes, 0, size - 1);
+ notes->hit_cycles = 0;
+ notes->hit_insn = 0;
+ notes->cover_insn = 0;
+
pthread_mutex_lock(&notes->lock);
- for (offset = 0; offset < size; ++offset) {
+ for (offset = size - 1; offset >= 0; --offset) {
struct cyc_hist *ch;
ch = &notes->src->cycles_hist[offset];
@@ -1696,15 +1725,14 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
err = asprintf(&command,
"%s %s%s --start-address=0x%016" PRIx64
" --stop-address=0x%016" PRIx64
- " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
+ " -l -d %s %s -C \"$1\" 2>/dev/null|grep -v \"$1:\"|expand",
opts->objdump_path ?: "objdump",
opts->disassembler_style ? "-M " : "",
opts->disassembler_style ?: "",
map__rip_2objdump(map, sym->start),
map__rip_2objdump(map, sym->end),
opts->show_asm_raw ? "" : "--no-show-raw",
- opts->annotate_src ? "-S" : "",
- symfs_filename, symfs_filename);
+ opts->annotate_src ? "-S" : "");
if (err < 0) {
pr_err("Failure allocating memory for the command to run\n");
@@ -1729,7 +1757,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
close(stdout_fd[0]);
dup2(stdout_fd[1], 1);
close(stdout_fd[1]);
- execl("/bin/sh", "sh", "-c", command, NULL);
+ execl("/bin/sh", "sh", "-c", command, "--", symfs_filename,
+ NULL);
perror(command);
exit(-1);
}
@@ -1750,7 +1779,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
while (!feof(file)) {
/*
* The source code line number (lineno) needs to be kept in
- * accross calls to symbol__parse_objdump_line(), so that it
+ * across calls to symbol__parse_objdump_line(), so that it
* can associate it with the instructions till the next one.
* See disasm_line__new() and struct disasm_line::line_nr.
*/
@@ -1862,6 +1891,7 @@ int symbol__annotate(struct symbol *sym, struct map *map,
struct annotation_options *options,
struct arch **parch)
{
+ struct annotation *notes = symbol__annotation(sym);
struct annotate_args args = {
.privsize = privsize,
.evsel = evsel,
@@ -1892,6 +1922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map,
args.ms.map = map;
args.ms.sym = sym;
+ notes->start = map__rip_2objdump(map, sym->start);
return symbol__disassemble(sym, &args);
}
@@ -2555,6 +2586,22 @@ call_like:
disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
}
+static void ipc_coverage_string(char *bf, int size, struct annotation *notes)
+{
+ double ipc = 0.0, coverage = 0.0;
+
+ if (notes->hit_cycles)
+ ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+ if (notes->total_insn) {
+ coverage = notes->cover_insn * 100.0 /
+ ((double)notes->total_insn);
+ }
+
+ scnprintf(bf, size, "(Average IPC: %.2f, IPC Coverage: %.1f%%)",
+ ipc, coverage);
+}
+
static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
bool first_line, bool current_entry, bool change_color, int width,
void *obj, unsigned int percent_type,
@@ -2650,6 +2697,11 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati
ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
"Cycle(min/max)");
}
+
+ if (show_title && !*al->line) {
+ ipc_coverage_string(bf, sizeof(bf), notes);
+ obj__printf(obj, "%*s", ANNOTATION__AVG_IPC_WIDTH, bf);
+ }
}
obj__printf(obj, " ");
@@ -2746,8 +2798,6 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev
symbol__calc_percent(sym, evsel);
- notes->start = map__rip_2objdump(map, sym->start);
-
annotation__set_offsets(notes, size);
annotation__mark_jump_targets(notes, sym);
annotation__compute_ipc(notes, size);
@@ -2755,6 +2805,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev
notes->nr_events = nr_pcnt;
annotation__update_column_widths(notes);
+ sym->annotate2 = true;
return 0;
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 5399ba2321bb..95053cab41fe 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -4,16 +4,24 @@
#include <stdbool.h>
#include <stdint.h>
+#include <stdio.h>
#include <linux/types.h>
-#include "symbol.h"
-#include "hist.h"
-#include "sort.h"
#include <linux/list.h>
#include <linux/rbtree.h>
#include <pthread.h>
#include <asm/bug.h>
+#include "symbol_conf.h"
+struct hist_browser_timer;
+struct hist_entry;
struct ins_ops;
+struct map;
+struct map_symbol;
+struct addr_map_symbol;
+struct option;
+struct perf_sample;
+struct perf_evsel;
+struct symbol;
struct ins {
const char *name;
@@ -64,6 +72,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
#define ANNOTATION__IPC_WIDTH 6
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
+#define ANNOTATION__AVG_IPC_WIDTH 36
struct annotation_options {
bool hide_src_code,
@@ -262,6 +271,10 @@ struct annotation {
pthread_mutex_t lock;
u64 max_coverage;
u64 start;
+ u64 hit_cycles;
+ u64 hit_insn;
+ unsigned int total_insn;
+ unsigned int cover_insn;
struct annotation_options *options;
struct annotation_line **offsets;
int nr_events;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c4617bcfd521..267e54df511b 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -27,6 +27,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/string.h>
+#include <linux/time64.h>
#include <sys/param.h>
#include <stdlib.h>
@@ -41,6 +42,7 @@
#include "pmu.h"
#include "evsel.h"
#include "cpumap.h"
+#include "symbol.h"
#include "thread_map.h"
#include "asm/bug.h"
#include "auxtrace.h"
@@ -857,7 +859,7 @@ void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
int code, int cpu, pid_t pid, pid_t tid, u64 ip,
- const char *msg)
+ const char *msg, u64 timestamp)
{
size_t size;
@@ -869,7 +871,9 @@ void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
auxtrace_error->cpu = cpu;
auxtrace_error->pid = pid;
auxtrace_error->tid = tid;
+ auxtrace_error->fmt = 1;
auxtrace_error->ip = ip;
+ auxtrace_error->time = timestamp;
strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
@@ -962,16 +966,23 @@ s64 perf_event__process_auxtrace(struct perf_session *session,
#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample)
{
- synth_opts->instructions = true;
synth_opts->branches = true;
synth_opts->transactions = true;
synth_opts->ptwrites = true;
synth_opts->pwr_events = true;
synth_opts->errors = true;
- synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
- synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ if (no_sample) {
+ synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ synth_opts->period = 1;
+ synth_opts->calls = true;
+ } else {
+ synth_opts->instructions = true;
+ synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
synth_opts->initial_skip = 0;
@@ -999,7 +1010,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
}
if (!str) {
- itrace_synth_opts__set_default(synth_opts);
+ itrace_synth_opts__set_default(synth_opts, false);
return 0;
}
@@ -1152,12 +1163,27 @@ static const char *auxtrace_error_name(int type)
size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
{
struct auxtrace_error_event *e = &event->auxtrace_error;
+ unsigned long long nsecs = e->time;
+ const char *msg = e->msg;
int ret;
ret = fprintf(fp, " %s error type %u",
auxtrace_error_name(e->type), e->type);
+
+ if (e->fmt && nsecs) {
+ unsigned long secs = nsecs / NSEC_PER_SEC;
+
+ nsecs -= secs * NSEC_PER_SEC;
+ ret += fprintf(fp, " time %lu.%09llu", secs, nsecs);
+ } else {
+ ret += fprintf(fp, " time 0");
+ }
+
+ if (!e->fmt)
+ msg = (const char *)&e->time;
+
ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
- e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+ e->cpu, e->pid, e->tid, e->ip, e->code, msg);
return ret;
}
@@ -1271,9 +1297,9 @@ static int __auxtrace_mmap__read(struct perf_mmap *map,
}
/* padding must be written by fn() e.g. record__process_auxtrace() */
- padding = size & 7;
+ padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1);
if (padding)
- padding = 8 - padding;
+ padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding;
memset(&ev, 0, sizeof(ev));
ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
@@ -1976,17 +2002,14 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
{
- struct symbol *first_sym = dso__first_symbol(dso);
- struct symbol *last_sym = dso__last_symbol(dso);
-
- if (!first_sym || !last_sym) {
- pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+ if (dso__data_file_size(dso, NULL)) {
+ pr_err("Failed to determine filter for %s\nCannot determine file size.\n",
filt->filename);
return -EINVAL;
}
- filt->addr = first_sym->start;
- filt->size = last_sym->end - first_sym->start;
+ filt->addr = 0;
+ filt->size = dso->data.file_size;
return 0;
}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d88f6e9eb461..c69bcd9a3091 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -40,6 +40,9 @@ struct record_opts;
struct auxtrace_info_event;
struct events_stats;
+/* Auxtrace records must have the same alignment as perf event records */
+#define PERF_AUXTRACE_RECORD_ALIGNMENT 8
+
enum auxtrace_type {
PERF_AUXTRACE_UNKNOWN,
PERF_AUXTRACE_INTEL_PT,
@@ -58,6 +61,7 @@ enum itrace_period_type {
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
+ * @default_no_sample: Default to no sampling.
* @inject: indicates the event (not just the sample) must be fully synthesized
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
@@ -82,6 +86,7 @@ enum itrace_period_type {
*/
struct itrace_synth_opts {
bool set;
+ bool default_no_sample;
bool inject;
bool instructions;
bool branches;
@@ -514,7 +519,7 @@ void auxtrace_index__free(struct list_head *head);
void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
int code, int cpu, pid_t pid, pid_t tid, u64 ip,
- const char *msg);
+ const char *msg, u64 timestamp);
int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
struct perf_tool *tool,
@@ -528,7 +533,8 @@ int perf_event__process_auxtrace_error(struct perf_session *session,
union perf_event *event);
int itrace_parse_synth_opts(const struct option *opt, const char *str,
int unset);
-void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
+ bool no_sample);
size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
void perf_session__auxtrace_error_inc(struct perf_session *session,
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
index f1451c987eec..1be432657501 100644
--- a/tools/perf/util/block-range.c
+++ b/tools/perf/util/block-range.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include "block-range.h"
#include "annotate.h"
+#include <assert.h>
+#include <stdlib.h>
struct {
struct rb_root root;
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
index a5ba719d69fb..ec0fb534bf56 100644
--- a/tools/perf/util/block-range.h
+++ b/tools/perf/util/block-range.h
@@ -2,7 +2,11 @@
#ifndef __PERF_BLOCK_RANGE_H
#define __PERF_BLOCK_RANGE_H
-#include "symbol.h"
+#include <stdbool.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct symbol;
/*
* struct block_range - non-overlapping parts of basic blocks
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
new file mode 100644
index 000000000000..028c8ec1f62a
--- /dev/null
+++ b/tools/perf/util/bpf-event.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <linux/btf.h>
+#include "bpf-event.h"
+#include "debug.h"
+#include "symbol.h"
+#include "machine.h"
+
+#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
+{
+ int ret = 0;
+ size_t i;
+
+ for (i = 0; i < len; i++)
+ ret += snprintf(buf + ret, size - ret, "%02x", data[i]);
+ return ret;
+}
+
+int machine__process_bpf_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_bpf_event(event, stdout);
+ return 0;
+}
+
+/*
+ * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
+ * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
+ * one PERF_RECORD_KSYMBOL is generated for each sub program.
+ *
+ * Returns:
+ * 0 for success;
+ * -1 for failures;
+ * -2 for lack of kernel support.
+ */
+static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ int fd,
+ union perf_event *event,
+ struct record_opts *opts)
+{
+ struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
+ struct bpf_event *bpf_event = &event->bpf_event;
+ u32 sub_prog_cnt, i, func_info_rec_size = 0;
+ u8 (*prog_tags)[BPF_TAG_SIZE] = NULL;
+ struct bpf_prog_info info = { .type = 0, };
+ u32 info_len = sizeof(info);
+ void *func_infos = NULL;
+ u64 *prog_addrs = NULL;
+ struct btf *btf = NULL;
+ u32 *prog_lens = NULL;
+ bool has_btf = false;
+ char errbuf[512];
+ int err = 0;
+
+ /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+
+ if (err) {
+ pr_debug("%s: failed to get BPF program info: %s, aborting\n",
+ __func__, str_error_r(errno, errbuf, sizeof(errbuf)));
+ return -1;
+ }
+ if (info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+ pr_debug("%s: the kernel is too old, aborting\n", __func__);
+ return -2;
+ }
+
+ /* number of ksyms, func_lengths, and tags should match */
+ sub_prog_cnt = info.nr_jited_ksyms;
+ if (sub_prog_cnt != info.nr_prog_tags ||
+ sub_prog_cnt != info.nr_jited_func_lens)
+ return -1;
+
+ /* check BTF func info support */
+ if (info.btf_id && info.nr_func_info && info.func_info_rec_size) {
+ /* btf func info number should be same as sub_prog_cnt */
+ if (sub_prog_cnt != info.nr_func_info) {
+ pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
+ return -1;
+ }
+ if (btf__get_from_id(info.btf_id, &btf)) {
+ pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id);
+ return -1;
+ }
+ func_info_rec_size = info.func_info_rec_size;
+ func_infos = calloc(sub_prog_cnt, func_info_rec_size);
+ if (!func_infos) {
+ pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__);
+ return -1;
+ }
+ has_btf = true;
+ }
+
+ /*
+ * We need address, length, and tag for each sub program.
+ * Allocate memory and call bpf_obj_get_info_by_fd() again
+ */
+ prog_addrs = calloc(sub_prog_cnt, sizeof(u64));
+ if (!prog_addrs) {
+ pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__);
+ goto out;
+ }
+ prog_lens = calloc(sub_prog_cnt, sizeof(u32));
+ if (!prog_lens) {
+ pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__);
+ goto out;
+ }
+ prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE);
+ if (!prog_tags) {
+ pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__);
+ goto out;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.nr_jited_ksyms = sub_prog_cnt;
+ info.nr_jited_func_lens = sub_prog_cnt;
+ info.nr_prog_tags = sub_prog_cnt;
+ info.jited_ksyms = ptr_to_u64(prog_addrs);
+ info.jited_func_lens = ptr_to_u64(prog_lens);
+ info.prog_tags = ptr_to_u64(prog_tags);
+ info_len = sizeof(info);
+ if (has_btf) {
+ info.nr_func_info = sub_prog_cnt;
+ info.func_info_rec_size = func_info_rec_size;
+ info.func_info = ptr_to_u64(func_infos);
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("%s: failed to get BPF program info, aborting\n", __func__);
+ goto out;
+ }
+
+ /* Synthesize PERF_RECORD_KSYMBOL */
+ for (i = 0; i < sub_prog_cnt; i++) {
+ const struct bpf_func_info *finfo;
+ const char *short_name = NULL;
+ const struct btf_type *t;
+ int name_len;
+
+ *ksymbol_event = (struct ksymbol_event){
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct ksymbol_event, name),
+ },
+ .addr = prog_addrs[i],
+ .len = prog_lens[i],
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+ name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN,
+ "bpf_prog_");
+ name_len += snprintf_hex(ksymbol_event->name + name_len,
+ KSYM_NAME_LEN - name_len,
+ prog_tags[i], BPF_TAG_SIZE);
+ if (has_btf) {
+ finfo = func_infos + i * info.func_info_rec_size;
+ t = btf__type_by_id(btf, finfo->type_id);
+ short_name = btf__name_by_offset(btf, t->name_off);
+ } else if (i == 0 && sub_prog_cnt == 1) {
+ /* no subprog */
+ if (info.name[0])
+ short_name = info.name;
+ } else
+ short_name = "F";
+ if (short_name)
+ name_len += snprintf(ksymbol_event->name + name_len,
+ KSYM_NAME_LEN - name_len,
+ "_%s", short_name);
+
+ ksymbol_event->header.size += PERF_ALIGN(name_len + 1,
+ sizeof(u64));
+
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+ /* Synthesize PERF_RECORD_BPF_EVENT */
+ if (opts->bpf_event) {
+ *bpf_event = (struct bpf_event){
+ .header = {
+ .type = PERF_RECORD_BPF_EVENT,
+ .size = sizeof(struct bpf_event),
+ },
+ .type = PERF_BPF_EVENT_PROG_LOAD,
+ .flags = 0,
+ .id = info.id,
+ };
+ memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE);
+ memset((void *)event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+ err = perf_tool__process_synth_event(tool, event,
+ machine, process);
+ }
+
+out:
+ free(prog_tags);
+ free(prog_lens);
+ free(prog_addrs);
+ free(func_infos);
+ free(btf);
+ return err ? -1 : 0;
+}
+
+int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct record_opts *opts)
+{
+ union perf_event *event;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN + machine->id_hdr_size);
+ if (!event)
+ return -1;
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_debug("%s: can't get next program: %s%s\n",
+ __func__, strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ /* don't report error on old kernel or EPERM */
+ err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
+ break;
+ }
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ pr_debug("%s: failed to get fd for prog_id %u\n",
+ __func__, id);
+ continue;
+ }
+
+ err = perf_event__synthesize_one_bpf_prog(tool, process,
+ machine, fd,
+ event, opts);
+ close(fd);
+ if (err) {
+ /* do not return error for old kernel */
+ if (err == -2)
+ err = 0;
+ break;
+ }
+ }
+ free(event);
+ return err;
+}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
new file mode 100644
index 000000000000..7890067e1a37
--- /dev/null
+++ b/tools/perf/util/bpf-event.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BPF_EVENT_H
+#define __PERF_BPF_EVENT_H
+
+#include <linux/compiler.h>
+#include "event.h"
+
+struct machine;
+union perf_event;
+struct perf_sample;
+struct perf_tool;
+struct record_opts;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+int machine__process_bpf_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+
+int perf_event__synthesize_bpf_events(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ struct record_opts *opts);
+#else
+static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused)
+{
+ return 0;
+}
+
+static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return 0;
+}
+#endif // HAVE_LIBBPF_SUPPORT
+#endif
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f9ae1a993806..251d9ea6252f 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -15,6 +15,7 @@
#include <errno.h>
#include "perf.h"
#include "debug.h"
+#include "evlist.h"
#include "bpf-loader.h"
#include "bpf-prologue.h"
#include "probe-event.h"
@@ -24,22 +25,12 @@
#include "llvm-utils.h"
#include "c++/clang-c.h"
-#define DEFINE_PRINT_FN(name, level) \
-static int libbpf_##name(const char *fmt, ...) \
-{ \
- va_list args; \
- int ret; \
- \
- va_start(args, fmt); \
- ret = veprintf(level, verbose, pr_fmt(fmt), args);\
- va_end(args); \
- return ret; \
+static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
+ const char *fmt, va_list args)
+{
+ return veprintf(1, verbose, pr_fmt(fmt), args);
}
-DEFINE_PRINT_FN(warning, 1)
-DEFINE_PRINT_FN(info, 1)
-DEFINE_PRINT_FN(debug, 1)
-
struct bpf_prog_priv {
bool is_tp;
char *sys_name;
@@ -59,9 +50,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
struct bpf_object *obj;
if (!libbpf_initialized) {
- libbpf_set_print(libbpf_warning,
- libbpf_info,
- libbpf_debug);
+ libbpf_set_print(libbpf_perf_print);
libbpf_initialized = true;
}
@@ -79,9 +68,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
struct bpf_object *obj;
if (!libbpf_initialized) {
- libbpf_set_print(libbpf_warning,
- libbpf_info,
- libbpf_debug);
+ libbpf_set_print(libbpf_perf_print);
libbpf_initialized = true;
}
@@ -99,7 +86,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
if (err)
return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
} else
- pr_debug("bpf: successfull builtin compilation\n");
+ pr_debug("bpf: successful builtin compilation\n");
obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
@@ -1503,7 +1490,7 @@ apply_obj_config_object(struct bpf_object *obj)
struct bpf_map *map;
int err;
- bpf_map__for_each(map, obj) {
+ bpf_object__for_each_map(map, obj) {
err = apply_obj_config_map(map);
if (err)
return err;
@@ -1527,7 +1514,7 @@ int bpf__apply_obj_config(void)
#define bpf__for_each_map(pos, obj, objtmp) \
bpf_object__for_each_safe(obj, objtmp) \
- bpf_map__for_each(pos, obj)
+ bpf_object__for_each_map(pos, obj)
#define bpf__for_each_map_named(pos, obj, objtmp, name) \
bpf__for_each_map(pos, obj, objtmp) \
@@ -1603,7 +1590,7 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha
op = bpf_map__add_newop(map, NULL);
if (IS_ERR(op))
- return ERR_PTR(PTR_ERR(op));
+ return ERR_CAST(op);
op->op_type = BPF_MAP_OP_SET_EVSEL;
op->v.evsel = evsel;
}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 62d245a90e1d..3f46856e3330 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -8,11 +8,7 @@
#include <linux/compiler.h>
#include <linux/err.h>
-#include <string.h>
#include <bpf/libbpf.h>
-#include "probe-event.h"
-#include "evlist.h"
-#include "debug.h"
enum bpf_loader_errno {
__BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
@@ -44,6 +40,7 @@ enum bpf_loader_errno {
};
struct perf_evsel;
+struct perf_evlist;
struct bpf_object;
struct parse_events_term;
#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
@@ -87,6 +84,8 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha
int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size);
#else
#include <errno.h>
+#include <string.h>
+#include "debug.h"
static inline struct bpf_object *
bpf__prepare_load(const char *filename __maybe_unused,
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
new file mode 100644
index 000000000000..eb853ca67cf4
--- /dev/null
+++ b/tools/perf/util/bpf_map.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "util/bpf_map.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
+{
+ return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
+ def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+ def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+}
+
+static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
+{
+ if (bpf_map_def__is_per_cpu(def))
+ return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
+
+ return malloc(def->value_size);
+}
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
+{
+ const struct bpf_map_def *def = bpf_map__def(map);
+ void *prev_key = NULL, *key, *value;
+ int fd = bpf_map__fd(map), err;
+ int printed = 0;
+
+ if (fd < 0)
+ return fd;
+
+ if (IS_ERR(def))
+ return PTR_ERR(def);
+
+ err = -ENOMEM;
+ key = malloc(def->key_size);
+ if (key == NULL)
+ goto out;
+
+ value = bpf_map_def__alloc_value(def);
+ if (value == NULL)
+ goto out_free_key;
+
+ while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) {
+ int intkey = *(int *)key;
+
+ if (!bpf_map_lookup_elem(fd, key, value)) {
+ bool boolval = *(bool *)value;
+ if (boolval)
+ printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval);
+ } else {
+ printed += fprintf(fp, "[%d] = ERROR,\n", intkey);
+ }
+
+ prev_key = key;
+ }
+
+ if (err == ENOENT)
+ err = printed;
+
+ free(value);
+out_free_key:
+ free(key);
+out:
+ return err;
+}
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
new file mode 100644
index 000000000000..d6abd5e47af8
--- /dev/null
+++ b/tools/perf/util/bpf_map.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#ifndef __PERF_BPF_MAP_H
+#define __PERF_BPF_MAP_H 1
+
+#include <stdio.h>
+#include <linux/compiler.h>
+struct bpf_map;
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
+
+#else
+
+static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
+{
+ return 0;
+}
+
+#endif // HAVE_LIBBPF_SUPPORT
+
+#endif // __PERF_BPF_MAP_H
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 1e3c7c5cdc63..64f96b79f1d7 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -1,8 +1,31 @@
#ifndef _PERF_BRANCH_H
#define _PERF_BRANCH_H 1
+#include <stdio.h>
#include <stdint.h>
-#include "../perf.h"
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+struct branch_flags {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+};
+
+struct branch_entry {
+ u64 from;
+ u64 to;
+ struct branch_flags flags;
+};
+
+struct branch_stack {
+ u64 nr;
+ struct branch_entry entries[0];
+};
struct branch_type_stat {
bool branch_to;
@@ -13,8 +36,6 @@ struct branch_type_stat {
u64 cross_2m;
};
-struct branch_flags;
-
void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
u64 from, u64 to);
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 04b1d53e4bf9..bff0d17920ed 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -15,6 +15,8 @@
#include <sys/types.h>
#include "build-id.h"
#include "event.h"
+#include "namespaces.h"
+#include "map.h"
#include "symbol.h"
#include "thread.h"
#include <linux/kernel.h>
@@ -363,7 +365,8 @@ int perf_session__write_buildid_table(struct perf_session *session,
if (err)
return err;
- for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
err = machine__write_buildid_table(pos, fd);
if (err)
@@ -396,7 +399,8 @@ int dsos__hit_all(struct perf_session *session)
if (err)
return err;
- for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
err = machine__hit_all_dsos(pos);
@@ -849,7 +853,8 @@ int perf_session__cache_build_ids(struct perf_session *session)
ret = machine__cache_build_ids(&session->machines.host);
- for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__cache_build_ids(pos);
}
@@ -866,7 +871,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
struct rb_node *nd;
bool ret = machine__read_build_ids(&session->machines.host, with_hits);
- for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&session->machines.guests); nd;
+ nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret |= machine__read_build_ids(pos, with_hits);
}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index f0c565164a97..93668f38f1ed 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -6,9 +6,10 @@
#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
#include "tool.h"
-#include "namespaces.h"
#include <linux/types.h>
+struct nsinfo;
+
extern struct perf_tool build_id__mark_dso_hit_ops;
struct dso;
struct feat_fd;
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build
index 988fef1b11d7..613ecfd76527 100644
--- a/tools/perf/util/c++/Build
+++ b/tools/perf/util/c++/Build
@@ -1,2 +1,2 @@
-libperf-$(CONFIG_CLANGLLVM) += clang.o
-libperf-$(CONFIG_CLANGLLVM) += clang-test.o
+perf-$(CONFIG_CLANGLLVM) += clang.o
+perf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index 89512504551b..39c0004f2886 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module)
}
PM.run(*Module);
- return std::move(Buffer);
+ return Buffer;
}
}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 32ef7bdca1cf..abb608b09269 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -23,8 +23,10 @@
#include "util.h"
#include "sort.h"
#include "machine.h"
+#include "map.h"
#include "callchain.h"
#include "branch.h"
+#include "symbol.h"
#define CALLCHAIN_PARAM_DEFAULT \
.mode = CHAIN_GRAPH_ABS, \
@@ -766,6 +768,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
cnode->cycles_count += node->branch_flags.cycles;
cnode->iter_count += node->nr_loop_iter;
cnode->iter_cycles += node->iter_cycles;
+ cnode->from_count++;
}
}
@@ -1345,10 +1348,10 @@ static int branch_to_str(char *bf, int bfsize,
static int branch_from_str(char *bf, int bfsize,
u64 branch_count,
u64 cycles_count, u64 iter_count,
- u64 iter_cycles)
+ u64 iter_cycles, u64 from_count)
{
int printed = 0, i = 0;
- u64 cycles;
+ u64 cycles, v = 0;
cycles = cycles_count / branch_count;
if (cycles) {
@@ -1357,14 +1360,16 @@ static int branch_from_str(char *bf, int bfsize,
bf + printed, bfsize - printed);
}
- if (iter_count) {
- printed += count_pri64_printf(i++, "iter",
- iter_count,
- bf + printed, bfsize - printed);
+ if (iter_count && from_count) {
+ v = iter_count / from_count;
+ if (v) {
+ printed += count_pri64_printf(i++, "iter",
+ v, bf + printed, bfsize - printed);
- printed += count_pri64_printf(i++, "avg_cycles",
- iter_cycles / iter_count,
- bf + printed, bfsize - printed);
+ printed += count_pri64_printf(i++, "avg_cycles",
+ iter_cycles / iter_count,
+ bf + printed, bfsize - printed);
+ }
}
if (i)
@@ -1377,6 +1382,7 @@ static int counts_str_build(char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
u64 iter_count, u64 iter_cycles,
+ u64 from_count,
struct branch_type_stat *brtype_stat)
{
int printed;
@@ -1389,7 +1395,8 @@ static int counts_str_build(char *bf, int bfsize,
predicted_count, abort_count, brtype_stat);
} else {
printed = branch_from_str(bf, bfsize, branch_count,
- cycles_count, iter_count, iter_cycles);
+ cycles_count, iter_count, iter_cycles,
+ from_count);
}
if (!printed)
@@ -1402,13 +1409,14 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
u64 branch_count, u64 predicted_count,
u64 abort_count, u64 cycles_count,
u64 iter_count, u64 iter_cycles,
+ u64 from_count,
struct branch_type_stat *brtype_stat)
{
char str[256];
counts_str_build(str, sizeof(str), branch_count,
predicted_count, abort_count, cycles_count,
- iter_count, iter_cycles, brtype_stat);
+ iter_count, iter_cycles, from_count, brtype_stat);
if (fp)
return fprintf(fp, "%s", str);
@@ -1422,6 +1430,7 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
u64 branch_count, predicted_count;
u64 abort_count, cycles_count;
u64 iter_count, iter_cycles;
+ u64 from_count;
branch_count = clist->branch_count;
predicted_count = clist->predicted_count;
@@ -1429,11 +1438,12 @@ int callchain_list_counts__printf_value(struct callchain_list *clist,
cycles_count = clist->cycles_count;
iter_count = clist->iter_count;
iter_cycles = clist->iter_cycles;
+ from_count = clist->from_count;
return callchain_counts_printf(fp, bf, bfsize, branch_count,
predicted_count, abort_count,
cycles_count, iter_count, iter_cycles,
- &clist->brtype_stat);
+ from_count, &clist->brtype_stat);
}
static void free_callchain_node(struct callchain_node *node)
@@ -1569,3 +1579,18 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
return rc;
}
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *node;
+
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+
+ for (node = cursor->first; node != NULL; node = node->next)
+ map__zput(node->map);
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 154560b1eb65..80e056a3d882 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -2,14 +2,14 @@
#ifndef __PERF_CALLCHAIN_H
#define __PERF_CALLCHAIN_H
-#include "../perf.h"
#include <linux/list.h>
#include <linux/rbtree.h>
#include "event.h"
-#include "map.h"
-#include "symbol.h"
+#include "map_symbol.h"
#include "branch.h"
+struct map;
+
#define HELP_PAD "\t\t\t\t"
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
@@ -118,6 +118,7 @@ struct callchain_list {
bool has_children;
};
u64 branch_count;
+ u64 from_count;
u64 predicted_count;
u64 abort_count;
u64 cycles_count;
@@ -187,20 +188,7 @@ int callchain_append(struct callchain_root *root,
int callchain_merge(struct callchain_cursor *cursor,
struct callchain_root *dst, struct callchain_root *src);
-/*
- * Initialize a cursor before adding entries inside, but keep
- * the previously allocated entries as a cache.
- */
-static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
-{
- struct callchain_cursor_node *node;
-
- cursor->nr = 0;
- cursor->last = &cursor->first;
-
- for (node = cursor->first; node != NULL; node = node->next)
- map__zput(node->map);
-}
+void callchain_cursor_reset(struct callchain_cursor *cursor);
int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
struct map *map, struct symbol *sym,
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 39e628b8938e..39b8c4ec4e2e 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include "cache.h"
-#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include "color.h"
@@ -10,44 +9,6 @@
int perf_use_color_default = -1;
-int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
-{
- if (value) {
- if (!strcasecmp(value, "never"))
- return 0;
- if (!strcasecmp(value, "always"))
- return 1;
- if (!strcasecmp(value, "auto"))
- goto auto_color;
- }
-
- /* Missing or explicit false to turn off colorization */
- if (!perf_config_bool(var, value))
- return 0;
-
- /* any normal truth value defaults to 'auto' */
- auto_color:
- if (stdout_is_tty < 0)
- stdout_is_tty = isatty(1);
- if (stdout_is_tty || pager_in_use()) {
- char *term = getenv("TERM");
- if (term && strcmp(term, "dumb"))
- return 1;
- }
- return 0;
-}
-
-int perf_color_default_config(const char *var, const char *value,
- void *cb __maybe_unused)
-{
- if (!strcmp(var, "color.ui")) {
- perf_use_color_default = perf_config_colorbool(var, value, -1);
- return 0;
- }
-
- return 0;
-}
-
static int __color_vsnprintf(char *bf, size_t size, const char *color,
const char *fmt, va_list args, const char *trail)
{
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 22777b1812ee..01f7bed21c9b 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -3,6 +3,7 @@
#define __PERF_COLOR_H
#include <stdio.h>
+#include <stdarg.h>
/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
#define COLOR_MAXLEN 24
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
new file mode 100644
index 000000000000..817dc56e7e95
--- /dev/null
+++ b/tools/perf/util/color_config.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "cache.h"
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+ if (value) {
+ if (!strcasecmp(value, "never"))
+ return 0;
+ if (!strcasecmp(value, "always"))
+ return 1;
+ if (!strcasecmp(value, "auto"))
+ goto auto_color;
+ }
+
+ /* Missing or explicit false to turn off colorization */
+ if (!perf_config_bool(var, value))
+ return 0;
+
+ /* any normal truth value defaults to 'auto' */
+ auto_color:
+ if (stdout_is_tty < 0)
+ stdout_is_tty = isatty(1);
+ if (stdout_is_tty || pager_in_use()) {
+ char *term = getenv("TERM");
+ if (term && strcmp(term, "dumb"))
+ return 1;
+ }
+ return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "color.ui")) {
+ perf_use_color_default = perf_config_colorbool(var, value, -1);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 31279a7bd919..1066de92af12 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -6,6 +6,7 @@
#include <stdio.h>
#include <string.h>
#include <linux/refcount.h>
+#include <linux/rbtree.h>
#include "rwsem.h"
struct comm_str {
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
index 3e5c438fe85e..f35d8fbfa2dd 100644
--- a/tools/perf/util/comm.h
+++ b/tools/perf/util/comm.h
@@ -2,9 +2,9 @@
#ifndef __PERF_COMM_H
#define __PERF_COMM_H
-#include "../perf.h"
-#include <linux/rbtree.h>
#include <linux/list.h>
+#include <linux/types.h>
+#include <stdbool.h>
struct comm_str;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 5ac157056cdf..fa092511c52b 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -13,7 +13,9 @@
#include <sys/param.h>
#include "util.h"
#include "cache.h"
+#include "callchain.h"
#include <subcmd/exec-cmd.h>
+#include "util/event.h" /* proc_map_timeout */
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
#include "config.h"
@@ -419,6 +421,9 @@ static int perf_buildid_config(const char *var, const char *value)
static int perf_default_core_config(const char *var __maybe_unused,
const char *value __maybe_unused)
{
+ if (!strcmp(var, "core.proc-map-timeout"))
+ proc_map_timeout = strtoul(value, NULL, 10);
+
/* Add other config variables here. */
return 0;
}
@@ -811,14 +816,14 @@ int config_error_nonbool(const char *var)
void set_buildid_dir(const char *dir)
{
if (dir)
- scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
+ scnprintf(buildid_dir, MAXPATHLEN, "%s", dir);
/* default to $HOME/.debug */
if (buildid_dir[0] == '\0') {
char *home = getenv("HOME");
if (home) {
- snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
+ snprintf(buildid_dir, MAXPATHLEN, "%s/%s",
home, DEBUG_CACHE_DIR);
} else {
strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h
new file mode 100644
index 000000000000..8cf4e40d322a
--- /dev/null
+++ b/tools/perf/util/cpu-set-sched.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Definitions taken from glibc for use with older systems, same licensing.
+#ifndef _CPU_SET_SCHED_PERF_H
+#define _CPU_SET_SCHED_PERF_H
+
+#include <features.h>
+#include <sched.h>
+
+#ifndef CPU_EQUAL
+#ifndef __CPU_EQUAL_S
+#if __GNUC_PREREQ (2, 91)
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
+#else
+# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
+ (__extension__ \
+ ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \
+ const __cpu_mask *__arr2 = (cpusetp2)->__bits; \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ if (__arr1[__i] != __arr2[__i]) \
+ break; \
+ __i == __imax; }))
+#endif
+#endif // __CPU_EQUAL_S
+
+#define CPU_EQUAL(cpusetp1, cpusetp2) \
+ __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2)
+#endif // CPU_EQUAL
+
+#ifndef CPU_OR
+#ifndef __CPU_OP_S
+#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
+ (__extension__ \
+ ({ cpu_set_t *__dest = (destset); \
+ const __cpu_mask *__arr1 = (srcset1)->__bits; \
+ const __cpu_mask *__arr2 = (srcset2)->__bits; \
+ size_t __imax = (setsize) / sizeof (__cpu_mask); \
+ size_t __i; \
+ for (__i = 0; __i < __imax; ++__i) \
+ ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \
+ __dest; }))
+#endif // __CPU_OP_S
+
+#define CPU_OR(destset, srcset1, srcset2) \
+ __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |)
+#endif // CPU_OR
+
+#endif // _CPU_SET_SCHED_PERF_H
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 1ccbd3342069..0b599229bc7e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
if (!cpu_list)
return cpu_map__read_all_cpu_map();
- if (!isdigit(*cpu_list))
+ /*
+ * must handle the case of empty cpumap to cover
+ * TOPOLOGY header for NUMA nodes with no CPU
+ * ( e.g., because of CPU hotplug)
+ */
+ if (!isdigit(*cpu_list) && *cpu_list != '\0')
goto out;
while (isdigit(*cpu_list)) {
@@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
if (nr_cpus > 0)
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else
+ else if (*cpu_list != '\0')
cpus = cpu_map__default_new();
+ else
+ cpus = cpu_map__dummy_new();
invalid:
free(tmp_cpus);
out:
@@ -674,7 +681,7 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size)
#undef COMMA
- pr_debug("cpumask list: %s\n", buf);
+ pr_debug2("cpumask list: %s\n", buf);
return ret;
}
@@ -723,3 +730,13 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size)
buf[size - 1] = '\0';
return ptr - buf;
}
+
+const struct cpu_map *cpu_map__online(void) /* thread unsafe */
+{
+ static const struct cpu_map *online = NULL;
+
+ if (!online)
+ online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
+
+ return online;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index ed8999d1a640..f00ce624b9f7 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -29,6 +29,7 @@ int cpu_map__get_core_id(int cpu);
int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
+const struct cpu_map *cpu_map__online(void); /* thread unsafe */
struct cpu_map *cpu_map__get(struct cpu_map *map);
void cpu_map__put(struct cpu_map *map);
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
new file mode 100644
index 000000000000..ece0710249d4
--- /dev/null
+++ b/tools/perf/util/cputopo.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/param.h>
+#include <inttypes.h>
+#include <api/fs/fs.h>
+
+#include "cputopo.h"
+#include "cpumap.h"
+#include "util.h"
+#include "env.h"
+
+
+#define CORE_SIB_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define THRD_SIB_FMT \
+ "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+#define NODE_ONLINE_FMT \
+ "%s/devices/system/node/online"
+#define NODE_MEMINFO_FMT \
+ "%s/devices/system/node/node%d/meminfo"
+#define NODE_CPULIST_FMT \
+ "%s/devices/system/node/node%d/cpulist"
+
+static int build_cpu_topology(struct cpu_topology *tp, int cpu)
+{
+ FILE *fp;
+ char filename[MAXPATHLEN];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ ssize_t sret;
+ u32 i = 0;
+ int ret = -1;
+
+ scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_threads;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_threads;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->core_sib; i++) {
+ if (!strcmp(buf, tp->core_siblings[i]))
+ break;
+ }
+ if (i == tp->core_sib) {
+ tp->core_siblings[i] = buf;
+ tp->core_sib++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
+try_threads:
+ scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
+ sysfs__mountpoint(), cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto done;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto done;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->thread_sib; i++) {
+ if (!strcmp(buf, tp->thread_siblings[i]))
+ break;
+ }
+ if (i == tp->thread_sib) {
+ tp->thread_siblings[i] = buf;
+ tp->thread_sib++;
+ buf = NULL;
+ }
+ ret = 0;
+done:
+ if (fp)
+ fclose(fp);
+ free(buf);
+ return ret;
+}
+
+void cpu_topology__delete(struct cpu_topology *tp)
+{
+ u32 i;
+
+ if (!tp)
+ return;
+
+ for (i = 0 ; i < tp->core_sib; i++)
+ zfree(&tp->core_siblings[i]);
+
+ for (i = 0 ; i < tp->thread_sib; i++)
+ zfree(&tp->thread_siblings[i]);
+
+ free(tp);
+}
+
+struct cpu_topology *cpu_topology__new(void)
+{
+ struct cpu_topology *tp = NULL;
+ void *addr;
+ u32 nr, i;
+ size_t sz;
+ long ncpus;
+ int ret = -1;
+ struct cpu_map *map;
+
+ ncpus = cpu__max_present_cpu();
+
+ /* build online CPU map */
+ map = cpu_map__new(NULL);
+ if (map == NULL) {
+ pr_debug("failed to get system cpumap\n");
+ return NULL;
+ }
+
+ nr = (u32)(ncpus & UINT_MAX);
+
+ sz = nr * sizeof(char *);
+ addr = calloc(1, sizeof(*tp) + 2 * sz);
+ if (!addr)
+ goto out_free;
+
+ tp = addr;
+ addr += sizeof(*tp);
+ tp->core_siblings = addr;
+ addr += sz;
+ tp->thread_siblings = addr;
+
+ for (i = 0; i < nr; i++) {
+ if (!cpu_map__has(map, i))
+ continue;
+
+ ret = build_cpu_topology(tp, i);
+ if (ret < 0)
+ break;
+ }
+
+out_free:
+ cpu_map__put(map);
+ if (ret) {
+ cpu_topology__delete(tp);
+ tp = NULL;
+ }
+ return tp;
+}
+
+static int load_numa_node(struct numa_topology_node *node, int nr)
+{
+ char str[MAXPATHLEN];
+ char field[32];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ int ret = -1;
+ FILE *fp;
+ u64 mem;
+
+ node->node = (u32) nr;
+
+ scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT,
+ sysfs__mountpoint(), nr);
+ fp = fopen(str, "r");
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &len, fp) > 0) {
+ /* skip over invalid lines */
+ if (!strchr(buf, ':'))
+ continue;
+ if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
+ goto err;
+ if (!strcmp(field, "MemTotal:"))
+ node->mem_total = mem;
+ if (!strcmp(field, "MemFree:"))
+ node->mem_free = mem;
+ if (node->mem_total && node->mem_free)
+ break;
+ }
+
+ fclose(fp);
+ fp = NULL;
+
+ scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT,
+ sysfs__mountpoint(), nr);
+
+ fp = fopen(str, "r");
+ if (!fp)
+ return -1;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto err;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ node->cpus = buf;
+ fclose(fp);
+ return 0;
+
+err:
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return ret;
+}
+
+struct numa_topology *numa_topology__new(void)
+{
+ struct cpu_map *node_map = NULL;
+ struct numa_topology *tp = NULL;
+ char path[MAXPATHLEN];
+ char *buf = NULL;
+ size_t len = 0;
+ u32 nr, i;
+ FILE *fp;
+ char *c;
+
+ scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT,
+ sysfs__mountpoint());
+
+ fp = fopen(path, "r");
+ if (!fp)
+ return NULL;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto out;
+
+ c = strchr(buf, '\n');
+ if (c)
+ *c = '\0';
+
+ node_map = cpu_map__new(buf);
+ if (!node_map)
+ goto out;
+
+ nr = (u32) node_map->nr;
+
+ tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
+ if (!tp)
+ goto out;
+
+ tp->nr = nr;
+
+ for (i = 0; i < nr; i++) {
+ if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
+ numa_topology__delete(tp);
+ tp = NULL;
+ break;
+ }
+ }
+
+out:
+ free(buf);
+ fclose(fp);
+ cpu_map__put(node_map);
+ return tp;
+}
+
+void numa_topology__delete(struct numa_topology *tp)
+{
+ u32 i;
+
+ for (i = 0; i < tp->nr; i++)
+ free(tp->nodes[i].cpus);
+
+ free(tp);
+}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
new file mode 100644
index 000000000000..47a97e71acdf
--- /dev/null
+++ b/tools/perf/util/cputopo.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUTOPO_H
+#define __PERF_CPUTOPO_H
+
+#include <linux/types.h>
+#include "env.h"
+
+struct cpu_topology {
+ u32 core_sib;
+ u32 thread_sib;
+ char **core_siblings;
+ char **thread_siblings;
+};
+
+struct numa_topology_node {
+ char *cpus;
+ u32 node;
+ u64 mem_total;
+ u64 mem_free;
+};
+
+struct numa_topology {
+ u32 nr;
+ struct numa_topology_node nodes[0];
+};
+
+struct cpu_topology *cpu_topology__new(void);
+void cpu_topology__delete(struct cpu_topology *tp);
+
+struct numa_topology *numa_topology__new(void);
+void numa_topology__delete(struct numa_topology *tp);
+
+#endif /* __PERF_CPUTOPO_H */
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
index bc22c39c727f..216cb17a3322 100644
--- a/tools/perf/util/cs-etm-decoder/Build
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -1 +1 @@
-libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
+perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6d0bb9..ba4c623cd8de 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -36,7 +36,6 @@
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
- bool trace_on;
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
ocsd_datapath_resp_t prev_return;
@@ -116,6 +115,19 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
return 1;
}
+static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
+ ocsd_etmv3_cfg *config)
+{
+ config->reg_idr = params->etmv3.reg_idr;
+ config->reg_ctrl = params->etmv3.reg_ctrl;
+ config->reg_ccer = params->etmv3.reg_ccer;
+ config->reg_trc_id = params->etmv3.reg_trc_id;
+ config->arch_ver = ARCH_V7;
+ config->core_prof = profile_CortexA;
+
+ return 0;
+}
+
static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
ocsd_etmv4_cfg *config)
{
@@ -237,10 +249,19 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
struct cs_etm_decoder *decoder)
{
const char *decoder_name;
+ ocsd_etmv3_cfg config_etmv3;
ocsd_etmv4_cfg trace_config_etmv4;
void *trace_config;
switch (t_params->protocol) {
+ case CS_ETM_PROTO_ETMV3:
+ case CS_ETM_PROTO_PTM:
+ cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
+ decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+ OCSD_BUILTIN_DCD_ETMV3 :
+ OCSD_BUILTIN_DCD_PTM;
+ trace_config = &config_etmv3;
+ break;
case CS_ETM_PROTO_ETMV4i:
cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
@@ -263,11 +284,18 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
decoder->tail = 0;
decoder->packet_count = 0;
for (i = 0; i < MAX_BUFFER; i++) {
+ decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[i].instr_count = 0;
decoder->packet_buffer[i].last_instr_taken_branch = false;
- decoder->packet_buffer[i].exc = false;
- decoder->packet_buffer[i].exc_ret = false;
+ decoder->packet_buffer[i].last_instr_size = 0;
+ decoder->packet_buffer[i].last_instr_type = 0;
+ decoder->packet_buffer[i].last_instr_subtype = 0;
+ decoder->packet_buffer[i].last_instr_cond = 0;
+ decoder->packet_buffer[i].flags = 0;
+ decoder->packet_buffer[i].exception_number = UINT32_MAX;
+ decoder->packet_buffer[i].trace_chan_id = UINT8_MAX;
decoder->packet_buffer[i].cpu = INT_MIN;
}
}
@@ -278,14 +306,12 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
enum cs_etm_sample_type sample_type)
{
u32 et = 0;
- struct int_node *inode = NULL;
+ int cpu;
if (decoder->packet_count >= MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
- /* Search the RB tree for the cpu associated with this traceID */
- inode = intlist__find(traceid_list, trace_chan_id);
- if (!inode)
+ if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
et = decoder->tail;
@@ -294,11 +320,19 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
decoder->packet_count++;
decoder->packet_buffer[et].sample_type = sample_type;
- decoder->packet_buffer[et].exc = false;
- decoder->packet_buffer[et].exc_ret = false;
- decoder->packet_buffer[et].cpu = *((int *)inode->priv);
+ decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
+ decoder->packet_buffer[et].cpu = cpu;
decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[et].instr_count = 0;
+ decoder->packet_buffer[et].last_instr_taken_branch = false;
+ decoder->packet_buffer[et].last_instr_size = 0;
+ decoder->packet_buffer[et].last_instr_type = 0;
+ decoder->packet_buffer[et].last_instr_subtype = 0;
+ decoder->packet_buffer[et].last_instr_cond = 0;
+ decoder->packet_buffer[et].flags = 0;
+ decoder->packet_buffer[et].exception_number = UINT32_MAX;
+ decoder->packet_buffer[et].trace_chan_id = trace_chan_id;
if (decoder->packet_count == MAX_BUFFER - 1)
return OCSD_RESP_WAIT;
@@ -321,8 +355,31 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
packet = &decoder->packet_buffer[decoder->tail];
+ switch (elem->isa) {
+ case ocsd_isa_aarch64:
+ packet->isa = CS_ETM_ISA_A64;
+ break;
+ case ocsd_isa_arm:
+ packet->isa = CS_ETM_ISA_A32;
+ break;
+ case ocsd_isa_thumb2:
+ packet->isa = CS_ETM_ISA_T32;
+ break;
+ case ocsd_isa_tee:
+ case ocsd_isa_jazelle:
+ case ocsd_isa_custom:
+ case ocsd_isa_unknown:
+ default:
+ packet->isa = CS_ETM_ISA_UNKNOWN;
+ }
+
packet->start_addr = elem->st_addr;
packet->end_addr = elem->en_addr;
+ packet->instr_count = elem->num_instr_range;
+ packet->last_instr_type = elem->last_i_type;
+ packet->last_instr_subtype = elem->last_i_subtype;
+ packet->last_instr_cond = elem->last_instr_cond;
+
switch (elem->last_i_type) {
case OCSD_INSTR_BR:
case OCSD_INSTR_BR_INDIRECT:
@@ -336,15 +393,43 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
break;
}
+ packet->last_instr_size = elem->last_instr_sz;
+
+ return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder,
+ const uint8_t trace_chan_id)
+{
+ return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_DISCONTINUITY);
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{ int ret = 0;
+ struct cs_etm_packet *packet;
+
+ ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_EXCEPTION);
+ if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+ return ret;
+
+ packet = &decoder->packet_buffer[decoder->tail];
+ packet->exception_number = elem->exception_number;
+
return ret;
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
- const uint8_t trace_chan_id)
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder,
+ const uint8_t trace_chan_id)
{
return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
- CS_ETM_TRACE_ON);
+ CS_ETM_EXCEPTION_RET);
}
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
@@ -359,26 +444,25 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
switch (elem->elem_type) {
case OCSD_GEN_TRC_ELEM_UNKNOWN:
break;
+ case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
- decoder->trace_on = false;
- break;
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_trace_on(decoder,
- trace_chan_id);
- decoder->trace_on = true;
+ resp = cs_etm_decoder__buffer_discontinuity(decoder,
+ trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
resp = cs_etm_decoder__buffer_range(decoder, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- decoder->packet_buffer[decoder->tail].exc = true;
+ resp = cs_etm_decoder__buffer_exception(decoder, elem,
+ trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- decoder->packet_buffer[decoder->tail].exc_ret = true;
+ resp = cs_etm_decoder__buffer_exception_ret(decoder,
+ trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
- case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_ADDR_NACC:
case OCSD_GEN_TRC_ELEM_TIMESTAMP:
case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
@@ -398,11 +482,20 @@ static int cs_etm_decoder__create_etm_packet_decoder(
struct cs_etm_decoder *decoder)
{
const char *decoder_name;
+ ocsd_etmv3_cfg config_etmv3;
ocsd_etmv4_cfg trace_config_etmv4;
void *trace_config;
u8 csid;
switch (t_params->protocol) {
+ case CS_ETM_PROTO_ETMV3:
+ case CS_ETM_PROTO_PTM:
+ cs_etm_decoder__gen_etmv3_config(t_params, &config_etmv3);
+ decoder_name = (t_params->protocol == CS_ETM_PROTO_ETMV3) ?
+ OCSD_BUILTIN_DCD_ETMV3 :
+ OCSD_BUILTIN_DCD_PTM;
+ trace_config = &config_etmv3;
+ break;
case CS_ETM_PROTO_ETMV4i:
cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b5755f742..3ab11dfa92ae 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -15,26 +15,35 @@
struct cs_etm_decoder;
-struct cs_etm_buffer {
- const unsigned char *buf;
- size_t len;
- u64 offset;
- u64 ref_timestamp;
+enum cs_etm_sample_type {
+ CS_ETM_EMPTY,
+ CS_ETM_RANGE,
+ CS_ETM_DISCONTINUITY,
+ CS_ETM_EXCEPTION,
+ CS_ETM_EXCEPTION_RET,
};
-enum cs_etm_sample_type {
- CS_ETM_EMPTY = 0,
- CS_ETM_RANGE = 1 << 0,
- CS_ETM_TRACE_ON = 1 << 1,
+enum cs_etm_isa {
+ CS_ETM_ISA_UNKNOWN,
+ CS_ETM_ISA_A64,
+ CS_ETM_ISA_A32,
+ CS_ETM_ISA_T32,
};
struct cs_etm_packet {
enum cs_etm_sample_type sample_type;
+ enum cs_etm_isa isa;
u64 start_addr;
u64 end_addr;
+ u32 instr_count;
+ u32 last_instr_type;
+ u32 last_instr_subtype;
+ u32 flags;
+ u32 exception_number;
+ u8 last_instr_cond;
u8 last_instr_taken_branch;
- u8 exc;
- u8 exc_ret;
+ u8 last_instr_size;
+ u8 trace_chan_id;
int cpu;
};
@@ -43,6 +52,13 @@ struct cs_etm_queue;
typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64,
size_t, u8 *);
+struct cs_etmv3_trace_params {
+ u32 reg_ctrl;
+ u32 reg_trc_id;
+ u32 reg_ccer;
+ u32 reg_idr;
+};
+
struct cs_etmv4_trace_params {
u32 reg_idr0;
u32 reg_idr1;
@@ -55,6 +71,7 @@ struct cs_etmv4_trace_params {
struct cs_etm_trace_params {
int protocol;
union {
+ struct cs_etmv3_trace_params etmv3;
struct cs_etmv4_trace_params etmv4;
};
};
@@ -78,11 +95,13 @@ enum {
CS_ETM_PROTO_ETMV3 = 1,
CS_ETM_PROTO_ETMV4i,
CS_ETM_PROTO_ETMV4d,
+ CS_ETM_PROTO_PTM,
};
-enum {
+enum cs_etm_decoder_operation {
CS_ETM_OPERATION_PRINT = 1,
CS_ETM_OPERATION_DECODE,
+ CS_ETM_OPERATION_MAX,
};
int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 2ae640257fdb..110804936fc3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -12,6 +12,7 @@
#include <linux/log2.h>
#include <linux/types.h>
+#include <opencsd/ocsd_if_types.h>
#include <stdlib.h>
#include "auxtrace.h"
@@ -24,6 +25,7 @@
#include "machine.h"
#include "map.h"
#include "perf.h"
+#include "symbol.h"
#include "thread.h"
#include "thread_map.h"
#include "thread-stack.h"
@@ -31,14 +33,6 @@
#define MAX_TIMESTAMP (~0ULL)
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
struct cs_etm_auxtrace {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -71,13 +65,10 @@ struct cs_etm_queue {
struct thread *thread;
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
- const struct cs_etm_state *state;
union perf_event *event_buf;
unsigned int queue_nr;
pid_t pid, tid;
int cpu;
- u64 time;
- u64 timestamp;
u64 offset;
u64 period_instructions;
struct branch_stack *last_branch;
@@ -85,11 +76,54 @@ struct cs_etm_queue {
size_t last_branch_pos;
struct cs_etm_packet *prev_packet;
struct cs_etm_packet *packet;
+ const unsigned char *buf;
+ size_t buf_len, buf_used;
};
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
- pid_t tid, u64 time_);
+ pid_t tid);
+
+/* PTMs ETMIDR [11:8] set to b0011 */
+#define ETMIDR_PTM_VERSION 0x00000300
+
+static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
+{
+ etmidr &= ETMIDR_PTM_VERSION;
+
+ if (etmidr == ETMIDR_PTM_VERSION)
+ return CS_ETM_PROTO_PTM;
+
+ return CS_ETM_PROTO_ETMV3;
+}
+
+static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+{
+ struct int_node *inode;
+ u64 *metadata;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+ *magic = metadata[CS_ETM_MAGIC];
+ return 0;
+}
+
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+{
+ struct int_node *inode;
+ u64 *metadata;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+ *cpu = (int)metadata[CS_ETM_CPU];
+ return 0;
+}
static void cs_etm__packet_dump(const char *pkt_string)
{
@@ -104,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string)
fflush(stdout);
}
+static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx,
+ u32 etmidr)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
+ t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
+ t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
+}
+
+static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm, int idx)
+{
+ u64 **metadata = etm->metadata;
+
+ t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
+ t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
+ t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
+ t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
+ t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
+ t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
+ t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
+}
+
+static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
+ struct cs_etm_auxtrace *etm)
+{
+ int i;
+ u32 etmidr;
+ u64 architecture;
+
+ for (i = 0; i < etm->num_cpu; i++) {
+ architecture = etm->metadata[i][CS_ETM_MAGIC];
+
+ switch (architecture) {
+ case __perf_cs_etmv3_magic:
+ etmidr = etm->metadata[i][CS_ETM_ETMIDR];
+ cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
+ break;
+ case __perf_cs_etmv4_magic:
+ cs_etm__set_trace_param_etmv4(t_params, etm, i);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_queue *etmq,
+ enum cs_etm_decoder_operation mode)
+{
+ int ret = -EINVAL;
+
+ if (!(mode < CS_ETM_OPERATION_MAX))
+ goto out;
+
+ d_params->packet_printer = cs_etm__packet_dump;
+ d_params->operation = mode;
+ d_params->data = etmq;
+ d_params->formatted = true;
+ d_params->fsyncs = false;
+ d_params->hsyncs = false;
+ d_params->frame_aligned = true;
+
+ ret = 0;
+out:
+ return ret;
+}
+
static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
struct auxtrace_buffer *buffer)
{
- int i, ret;
+ int ret;
const char *color = PERF_COLOR_BLUE;
struct cs_etm_decoder_params d_params;
struct cs_etm_trace_params *t_params;
@@ -121,32 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
/* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
- for (i = 0; i < etm->num_cpu; i++) {
- t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
- t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
- t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
- t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
- t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
- t_params[i].etmv4.reg_configr =
- etm->metadata[i][CS_ETMV4_TRCCONFIGR];
- t_params[i].etmv4.reg_traceidr =
- etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
- }
+
+ if (!t_params)
+ return;
+
+ if (cs_etm__init_trace_params(t_params, etm))
+ goto out_free;
/* Set decoder parameters to simply print the trace packets */
- d_params.packet_printer = cs_etm__packet_dump;
- d_params.operation = CS_ETM_OPERATION_PRINT;
- d_params.formatted = true;
- d_params.fsyncs = false;
- d_params.hsyncs = false;
- d_params.frame_aligned = true;
+ if (cs_etm__init_decoder_params(&d_params, NULL,
+ CS_ETM_OPERATION_PRINT))
+ goto out_free;
decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- zfree(&t_params);
-
if (!decoder)
- return;
+ goto out_free;
do {
size_t consumed;
@@ -161,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
} while (buffer_used < buffer->size);
cs_etm_decoder__free(decoder);
+
+out_free:
+ zfree(&t_params);
}
static int cs_etm__flush_events(struct perf_session *session,
@@ -184,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session,
if (ret < 0)
return ret;
- return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1);
+ return cs_etm__process_timeless_queues(etm, -1);
}
static void cs_etm__free_queue(void *priv)
@@ -230,7 +330,7 @@ static void cs_etm__free(struct perf_session *session)
cs_etm__free_events(session);
session->auxtrace = NULL;
- /* First remove all traceID/CPU# nodes for the RB tree */
+ /* First remove all traceID/metadata nodes for the RB tree */
intlist__for_each_entry_safe(inode, tmp, traceid_list)
intlist__remove(traceid_list, inode);
/* Then the RB tree itself */
@@ -244,6 +344,27 @@ static void cs_etm__free(struct perf_session *session)
zfree(&aux);
}
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+ struct machine *machine;
+
+ machine = etmq->etm->machine;
+
+ if (address >= etmq->etm->kernel_start) {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_KERNEL;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_USER;
+ else if (perf_guest)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_HYPERVISOR;
+ }
+}
+
static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
size_t size, u8 *buffer)
{
@@ -255,18 +376,15 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
struct addr_location al;
if (!etmq)
- return -1;
+ return 0;
machine = etmq->etm->machine;
- if (address >= etmq->etm->kernel_start)
- cpumode = PERF_RECORD_MISC_KERNEL;
- else
- cpumode = PERF_RECORD_MISC_USER;
+ cpumode = cs_etm__cpu_mode(etmq, address);
thread = etmq->thread;
if (!thread) {
if (cpumode != PERF_RECORD_MISC_KERNEL)
- return -EINVAL;
+ return 0;
thread = etmq->etm->unknown_thread;
}
@@ -289,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
return len;
}
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
- unsigned int queue_nr)
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
{
- int i;
struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params;
+ struct cs_etm_trace_params *t_params = NULL;
struct cs_etm_queue *etmq;
size_t szp = sizeof(struct cs_etm_packet);
@@ -329,43 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
if (!etmq->event_buf)
goto out_free;
- etmq->etm = etm;
- etmq->queue_nr = queue_nr;
- etmq->pid = -1;
- etmq->tid = -1;
- etmq->cpu = -1;
-
/* Use metadata to fill in trace parameters for trace decoder */
t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
if (!t_params)
goto out_free;
- for (i = 0; i < etm->num_cpu; i++) {
- t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
- t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
- t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
- t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
- t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
- t_params[i].etmv4.reg_configr =
- etm->metadata[i][CS_ETMV4_TRCCONFIGR];
- t_params[i].etmv4.reg_traceidr =
- etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
- }
+ if (cs_etm__init_trace_params(t_params, etm))
+ goto out_free;
- /* Set decoder parameters to simply print the trace packets */
- d_params.packet_printer = cs_etm__packet_dump;
- d_params.operation = CS_ETM_OPERATION_DECODE;
- d_params.formatted = true;
- d_params.fsyncs = false;
- d_params.hsyncs = false;
- d_params.frame_aligned = true;
- d_params.data = etmq;
+ /* Set decoder parameters to decode trace packets */
+ if (cs_etm__init_decoder_params(&d_params, etmq,
+ CS_ETM_OPERATION_DECODE))
+ goto out_free;
etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
- zfree(&t_params);
-
if (!etmq->decoder)
goto out_free;
@@ -378,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
cs_etm__mem_access))
goto out_free_decoder;
- etmq->offset = 0;
- etmq->period_instructions = 0;
-
+ zfree(&t_params);
return etmq;
out_free_decoder:
cs_etm_decoder__free(etmq->decoder);
out_free:
+ zfree(&t_params);
zfree(&etmq->event_buf);
zfree(&etmq->last_branch);
zfree(&etmq->last_branch_rb);
@@ -400,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
unsigned int queue_nr)
{
+ int ret = 0;
struct cs_etm_queue *etmq = queue->priv;
if (list_empty(&queue->head) || etmq)
- return 0;
+ goto out;
- etmq = cs_etm__alloc_queue(etm, queue_nr);
+ etmq = cs_etm__alloc_queue(etm);
- if (!etmq)
- return -ENOMEM;
+ if (!etmq) {
+ ret = -ENOMEM;
+ goto out;
+ }
queue->priv = etmq;
-
- if (queue->cpu != -1)
- etmq->cpu = queue->cpu;
-
+ etmq->etm = etm;
+ etmq->queue_nr = queue_nr;
+ etmq->cpu = queue->cpu;
etmq->tid = queue->tid;
+ etmq->pid = -1;
+ etmq->offset = 0;
+ etmq->period_instructions = 0;
- return 0;
+out:
+ return ret;
}
static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
@@ -425,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
unsigned int i;
int ret;
+ if (!etm->kernel_start)
+ etm->kernel_start = machine__kernel_start(etm->machine);
+
for (i = 0; i < etm->queues.nr_queues; i++) {
ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
if (ret)
@@ -492,53 +595,54 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
etmq->last_branch_rb->nr = 0;
}
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
- /* Returns 0 for the CS_ETM_TRACE_ON packet */
- if (packet->sample_type == CS_ETM_TRACE_ON)
- return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+ u64 addr) {
+ u8 instrBytes[2];
+ cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
/*
- * The packet records the execution range with an exclusive end address
- *
- * A64 instructions are constant size, so the last executed
- * instruction is A64_INSTR_SIZE before the end address
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
+ * T32 instruction size is indicated by bits[15:11] of the first
+ * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+ * denote a 32-bit instruction.
*/
- return packet->end_addr - A64_INSTR_SIZE;
+ return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
}
static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
{
- /* Returns 0 for the CS_ETM_TRACE_ON packet */
- if (packet->sample_type == CS_ETM_TRACE_ON)
+ /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY)
return 0;
return packet->start_addr;
}
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
{
- /*
- * Only A64 instructions are currently supported, so can get
- * instruction count by dividing.
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
- */
- return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+ /* Returns 0 for the CS_ETM_DISCONTINUITY packet */
+ if (packet->sample_type == CS_ETM_DISCONTINUITY)
+ return 0;
+
+ return packet->end_addr - packet->last_instr_size;
}
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+ const struct cs_etm_packet *packet,
u64 offset)
{
- /*
- * Only A64 instructions are currently supported, so can get
- * instruction address by muliplying.
- * Will need to do instruction level decode for T32 instructions as
- * they can be variable size (not yet supported).
- */
- return packet->start_addr + offset * A64_INSTR_SIZE;
+ if (packet->isa == CS_ETM_ISA_T32) {
+ u64 addr = packet->start_addr;
+
+ while (offset > 0) {
+ addr += cs_etm__t32_instr_size(etmq, addr);
+ offset--;
+ }
+ return addr;
+ }
+
+ /* Assume a 4 byte instruction size (A32/A64) */
+ return packet->start_addr + offset * 4;
}
static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -581,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event,
static int
-cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
+cs_etm__get_trace(struct cs_etm_queue *etmq)
{
struct auxtrace_buffer *aux_buffer = etmq->buffer;
struct auxtrace_buffer *old_buffer = aux_buffer;
@@ -595,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (!aux_buffer) {
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
- buff->len = 0;
+ etmq->buf_len = 0;
return 0;
}
@@ -615,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
- buff->offset = aux_buffer->offset;
- buff->len = aux_buffer->size;
- buff->buf = aux_buffer->data;
-
- buff->ref_timestamp = aux_buffer->reference;
+ etmq->buf_used = 0;
+ etmq->buf_len = aux_buffer->size;
+ etmq->buf = aux_buffer->data;
- return buff->len;
+ return etmq->buf_len;
}
static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
@@ -653,7 +755,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
struct perf_sample sample = {.ip = 0,};
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
event->sample.header.size = sizeof(struct perf_event_header);
sample.ip = addr;
@@ -663,9 +765,9 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
sample.stream_id = etmq->etm->instructions_id;
sample.period = period;
sample.cpu = etmq->packet->cpu;
- sample.flags = 0;
+ sample.flags = etmq->prev_packet->flags;
sample.insn_len = 1;
- sample.cpumode = event->header.misc;
+ sample.cpumode = event->sample.header.misc;
if (etm->synth_opts.last_branch) {
cs_etm__copy_last_branch_rb(etmq);
@@ -706,12 +808,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
u64 nr;
struct branch_entry entries;
} dummy_bs;
+ u64 ip;
+
+ ip = cs_etm__last_executed_instr(etmq->prev_packet);
event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
event->sample.header.size = sizeof(struct perf_event_header);
- sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
+ sample.ip = ip;
sample.pid = etmq->pid;
sample.tid = etmq->tid;
sample.addr = cs_etm__first_executed_instr(etmq->packet);
@@ -719,8 +824,8 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
sample.stream_id = etmq->etm->branches_id;
sample.period = 1;
sample.cpu = etmq->packet->cpu;
- sample.flags = 0;
- sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.flags = etmq->prev_packet->flags;
+ sample.cpumode = event->sample.header.misc;
/*
* perf report cannot handle events without a branch stack
@@ -867,9 +972,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
struct cs_etm_auxtrace *etm = etmq->etm;
struct cs_etm_packet *tmp;
int ret;
- u64 instrs_executed;
+ u64 instrs_executed = etmq->packet->instr_count;
- instrs_executed = cs_etm__instr_count(etmq->packet);
etmq->period_instructions += instrs_executed;
/*
@@ -899,7 +1003,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
* executed, but PC has not advanced to next instruction)
*/
u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+ u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
ret = cs_etm__synth_instruction_sample(
etmq, addr, etm->instructions_sample_period);
@@ -914,7 +1018,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
bool generate_sample = false;
/* Generate sample for tracing on packet */
- if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON)
+ if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
generate_sample = true;
/* Generate sample for branch taken packet */
@@ -942,6 +1046,25 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
return 0;
}
+static int cs_etm__exception(struct cs_etm_queue *etmq)
+{
+ /*
+ * When the exception packet is inserted, whether the last instruction
+ * in previous range packet is taken branch or not, we need to force
+ * to set 'prev_packet->last_instr_taken_branch' to true. This ensures
+ * to generate branch sample for the instruction range before the
+ * exception is trapped to kernel or before the exception returning.
+ *
+ * The exception packet includes the dummy address values, so don't
+ * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
+ * for generating instruction and branch samples.
+ */
+ if (etmq->prev_packet->sample_type == CS_ETM_RANGE)
+ etmq->prev_packet->last_instr_taken_branch = true;
+
+ return 0;
+}
+
static int cs_etm__flush(struct cs_etm_queue *etmq)
{
int err = 0;
@@ -984,7 +1107,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
}
swap_packet:
- if (etmq->etm->synth_opts.last_branch) {
+ if (etm->sample_branches || etm->synth_opts.last_branch) {
/*
* Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
* the next incoming packet.
@@ -997,96 +1120,532 @@ swap_packet:
return err;
}
+static int cs_etm__end_block(struct cs_etm_queue *etmq)
+{
+ int err;
+
+ /*
+ * It has no new packet coming and 'etmq->packet' contains the stale
+ * packet which was set at the previous time with packets swapping;
+ * so skip to generate branch sample to avoid stale packet.
+ *
+ * For this case only flush branch stack and generate a last branch
+ * event for the branches left in the circular buffer at the end of
+ * the trace.
+ */
+ if (etmq->etm->synth_opts.last_branch &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ /*
+ * Use the address of the end of the last reported execution
+ * range.
+ */
+ u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+
+ err = cs_etm__synth_instruction_sample(
+ etmq, addr,
+ etmq->period_instructions);
+ if (err)
+ return err;
+
+ etmq->period_instructions = 0;
+ }
+
+ return 0;
+}
+/*
+ * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
+ * if need be.
+ * Returns: < 0 if error
+ * = 0 if no more auxtrace_buffer to read
+ * > 0 if the current buffer isn't empty yet
+ */
+static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
+{
+ int ret;
+
+ if (!etmq->buf_len) {
+ ret = cs_etm__get_trace(etmq);
+ if (ret <= 0)
+ return ret;
+ /*
+ * We cannot assume consecutive blocks in the data file
+ * are contiguous, reset the decoder to force re-sync.
+ */
+ ret = cs_etm_decoder__reset(etmq->decoder);
+ if (ret)
+ return ret;
+ }
+
+ return etmq->buf_len;
+}
+
+static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
+ struct cs_etm_packet *packet,
+ u64 end_addr)
+{
+ u16 instr16;
+ u32 instr32;
+ u64 addr;
+
+ switch (packet->isa) {
+ case CS_ETM_ISA_T32:
+ /*
+ * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
+ *
+ * b'15 b'8
+ * +-----------------+--------+
+ * | 1 1 0 1 1 1 1 1 | imm8 |
+ * +-----------------+--------+
+ *
+ * According to the specifiction, it only defines SVC for T32
+ * with 16 bits instruction and has no definition for 32bits;
+ * so below only read 2 bytes as instruction size for T32.
+ */
+ addr = end_addr - 2;
+ cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16);
+ if ((instr16 & 0xFF00) == 0xDF00)
+ return true;
+
+ break;
+ case CS_ETM_ISA_A32:
+ /*
+ * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
+ *
+ * b'31 b'28 b'27 b'24
+ * +---------+---------+-------------------------+
+ * | !1111 | 1 1 1 1 | imm24 |
+ * +---------+---------+-------------------------+
+ */
+ addr = end_addr - 4;
+ cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ if ((instr32 & 0x0F000000) == 0x0F000000 &&
+ (instr32 & 0xF0000000) != 0xF0000000)
+ return true;
+
+ break;
+ case CS_ETM_ISA_A64:
+ /*
+ * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
+ *
+ * b'31 b'21 b'4 b'0
+ * +-----------------------+---------+-----------+
+ * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
+ * +-----------------------+---------+-----------+
+ */
+ addr = end_addr - 4;
+ cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
+ if ((instr32 & 0xFFE0001F) == 0xd4000001)
+ return true;
+
+ break;
+ case CS_ETM_ISA_UNKNOWN:
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
+{
+ struct cs_etm_packet *packet = etmq->packet;
+ struct cs_etm_packet *prev_packet = etmq->prev_packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_SVC)
+ return true;
+
+ /*
+ * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
+ * HVC cases; need to check if it's SVC instruction based on
+ * packet address.
+ */
+ if (magic == __perf_cs_etmv4_magic) {
+ if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+ cs_etm__is_svc_instr(etmq, prev_packet,
+ prev_packet->end_addr))
+ return true;
+ }
+
+ return false;
+}
+
+static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
+{
+ struct cs_etm_packet *packet = etmq->packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
+ packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
+ packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
+ packet->exception_number == CS_ETMV3_EXC_IRQ ||
+ packet->exception_number == CS_ETMV3_EXC_FIQ)
+ return true;
+
+ if (magic == __perf_cs_etmv4_magic)
+ if (packet->exception_number == CS_ETMV4_EXC_RESET ||
+ packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
+ packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
+ packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
+ packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
+ packet->exception_number == CS_ETMV4_EXC_IRQ ||
+ packet->exception_number == CS_ETMV4_EXC_FIQ)
+ return true;
+
+ return false;
+}
+
+static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
+{
+ struct cs_etm_packet *packet = etmq->packet;
+ struct cs_etm_packet *prev_packet = etmq->prev_packet;
+
+ if (magic == __perf_cs_etmv3_magic)
+ if (packet->exception_number == CS_ETMV3_EXC_SMC ||
+ packet->exception_number == CS_ETMV3_EXC_HYP ||
+ packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
+ packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
+ packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
+ packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
+ packet->exception_number == CS_ETMV3_EXC_GENERIC)
+ return true;
+
+ if (magic == __perf_cs_etmv4_magic) {
+ if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
+ packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
+ packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
+ packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
+ return true;
+
+ /*
+ * For CS_ETMV4_EXC_CALL, except SVC other instructions
+ * (SMC, HVC) are taken as sync exceptions.
+ */
+ if (packet->exception_number == CS_ETMV4_EXC_CALL &&
+ !cs_etm__is_svc_instr(etmq, prev_packet,
+ prev_packet->end_addr))
+ return true;
+
+ /*
+ * ETMv4 has 5 bits for exception number; if the numbers
+ * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
+ * they are implementation defined exceptions.
+ *
+ * For this case, simply take it as sync exception.
+ */
+ if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
+ packet->exception_number <= CS_ETMV4_EXC_END)
+ return true;
+ }
+
+ return false;
+}
+
+static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
+{
+ struct cs_etm_packet *packet = etmq->packet;
+ struct cs_etm_packet *prev_packet = etmq->prev_packet;
+ u64 magic;
+ int ret;
+
+ switch (packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * Immediate branch instruction without neither link nor
+ * return flag, it's normal branch instruction within
+ * the function.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR &&
+ packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
+ packet->flags = PERF_IP_FLAG_BRANCH;
+
+ if (packet->last_instr_cond)
+ packet->flags |= PERF_IP_FLAG_CONDITIONAL;
+ }
+
+ /*
+ * Immediate branch instruction with link (e.g. BL), this is
+ * branch instruction for function call.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR &&
+ packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL;
+
+ /*
+ * Indirect branch instruction with link (e.g. BLR), this is
+ * branch instruction for function call.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL;
+
+ /*
+ * Indirect branch instruction with subtype of
+ * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
+ * function return for A32/T32.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /*
+ * Indirect branch instruction without link (e.g. BR), usually
+ * this is used for function return, especially for functions
+ * within dynamic link lib.
+ */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_NONE)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /* Return instruction for function return. */
+ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
+ packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN;
+
+ /*
+ * Decoder might insert a discontinuity in the middle of
+ * instruction packets, fixup prev_packet with flag
+ * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
+ */
+ if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
+ prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ /*
+ * If the previous packet is an exception return packet
+ * and the return address just follows SVC instuction,
+ * it needs to calibrate the previous packet sample flags
+ * as PERF_IP_FLAG_SYSCALLRET.
+ */
+ if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT) &&
+ cs_etm__is_svc_instr(etmq, packet, packet->start_addr))
+ prev_packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * The trace is discontinuous, if the previous packet is
+ * instruction packet, set flag PERF_IP_FLAG_TRACE_END
+ * for previous packet.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags |= PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ break;
+ case CS_ETM_EXCEPTION:
+ ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+ if (ret)
+ return ret;
+
+ /* The exception is for system call. */
+ if (cs_etm__is_syscall(etmq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ /*
+ * The exceptions are triggered by external signals from bus,
+ * interrupt controller, debug module, PE reset or halt.
+ */
+ else if (cs_etm__is_async_exception(etmq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ /*
+ * Otherwise, exception is caused by trap, instruction &
+ * data fault, or alignment errors.
+ */
+ else if (cs_etm__is_sync_exception(etmq, magic))
+ packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+
+ /*
+ * When the exception packet is inserted, since exception
+ * packet is not used standalone for generating samples
+ * and it's affiliation to the previous instruction range
+ * packet; so set previous range packet flags to tell perf
+ * it is an exception taken branch.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags = packet->flags;
+ break;
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * When the exception return packet is inserted, since
+ * exception return packet is not used standalone for
+ * generating samples and it's affiliation to the previous
+ * instruction range packet; so set previous range packet
+ * flags to tell perf it is an exception return branch.
+ *
+ * The exception return can be for either system call or
+ * other exception types; unfortunately the packet doesn't
+ * contain exception type related info so we cannot decide
+ * the exception type purely based on exception return packet.
+ * If we record the exception number from exception packet and
+ * reuse it for excpetion return packet, this is not reliable
+ * due the trace can be discontinuity or the interrupt can
+ * be nested, thus the recorded exception number cannot be
+ * used for exception return packet for these two cases.
+ *
+ * For exception return packet, we only need to distinguish the
+ * packet is for system call or for other types. Thus the
+ * decision can be deferred when receive the next packet which
+ * contains the return address, based on the return address we
+ * can read out the previous instruction and check if it's a
+ * system call instruction and then calibrate the sample flag
+ * as needed.
+ */
+ if (prev_packet->sample_type == CS_ETM_RANGE)
+ prev_packet->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ break;
+ case CS_ETM_EMPTY:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
+{
+ int ret = 0;
+ size_t processed = 0;
+
+ /*
+ * Packets are decoded and added to the decoder's packet queue
+ * until the decoder packet processing callback has requested that
+ * processing stops or there is nothing left in the buffer. Normal
+ * operations that stop processing are a timestamp packet or a full
+ * decoder buffer queue.
+ */
+ ret = cs_etm_decoder__process_data_block(etmq->decoder,
+ etmq->offset,
+ &etmq->buf[etmq->buf_used],
+ etmq->buf_len,
+ &processed);
+ if (ret)
+ goto out;
+
+ etmq->offset += processed;
+ etmq->buf_used += processed;
+ etmq->buf_len -= processed;
+
+out:
+ return ret;
+}
+
+static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
+{
+ int ret;
+
+ /* Process each packet in this chunk */
+ while (1) {
+ ret = cs_etm_decoder__get_packet(etmq->decoder,
+ etmq->packet);
+ if (ret <= 0)
+ /*
+ * Stop processing this chunk on
+ * end of data or error
+ */
+ break;
+
+ /*
+ * Since packet addresses are swapped in packet
+ * handling within below switch() statements,
+ * thus setting sample flags must be called
+ * prior to switch() statement to use address
+ * information before packets swapping.
+ */
+ ret = cs_etm__set_sample_flags(etmq);
+ if (ret < 0)
+ break;
+
+ switch (etmq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq);
+ break;
+ case CS_ETM_EXCEPTION:
+ case CS_ETM_EXCEPTION_RET:
+ /*
+ * If the exception packet is coming,
+ * make sure the previous instruction
+ * range packet to be handled properly.
+ */
+ cs_etm__exception(etmq);
+ break;
+ case CS_ETM_DISCONTINUITY:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq);
+ break;
+ case CS_ETM_EMPTY:
+ /*
+ * Should not receive empty packet,
+ * report error.
+ */
+ pr_err("CS ETM Trace: empty packet\n");
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+
+ return ret;
+}
+
static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
{
- struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_buffer buffer;
- size_t buffer_used, processed;
int err = 0;
- if (!etm->kernel_start)
- etm->kernel_start = machine__kernel_start(etm->machine);
-
/* Go through each buffer in the queue and decode them one by one */
while (1) {
- buffer_used = 0;
- memset(&buffer, 0, sizeof(buffer));
- err = cs_etm__get_trace(&buffer, etmq);
+ err = cs_etm__get_data_block(etmq);
if (err <= 0)
return err;
- /*
- * We cannot assume consecutive blocks in the data file are
- * contiguous, reset the decoder to force re-sync.
- */
- err = cs_etm_decoder__reset(etmq->decoder);
- if (err != 0)
- return err;
/* Run trace decoder until buffer consumed or end of trace */
do {
- processed = 0;
- err = cs_etm_decoder__process_data_block(
- etmq->decoder,
- etmq->offset,
- &buffer.buf[buffer_used],
- buffer.len - buffer_used,
- &processed);
+ err = cs_etm__decode_data_block(etmq);
if (err)
return err;
- etmq->offset += processed;
- buffer_used += processed;
-
- /* Process each packet in this chunk */
- while (1) {
- err = cs_etm_decoder__get_packet(etmq->decoder,
- etmq->packet);
- if (err <= 0)
- /*
- * Stop processing this chunk on
- * end of data or error
- */
- break;
-
- switch (etmq->packet->sample_type) {
- case CS_ETM_RANGE:
- /*
- * If the packet contains an instruction
- * range, generate instruction sequence
- * events.
- */
- cs_etm__sample(etmq);
- break;
- case CS_ETM_TRACE_ON:
- /*
- * Discontinuity in trace, flush
- * previous branch stack
- */
- cs_etm__flush(etmq);
- break;
- case CS_ETM_EMPTY:
- /*
- * Should not receive empty packet,
- * report error.
- */
- pr_err("CS ETM Trace: empty packet\n");
- return -EINVAL;
- default:
- break;
- }
- }
- } while (buffer.len > buffer_used);
+ /*
+ * Process each packet in this chunk, nothing to do if
+ * an error occurs other than hoping the next one will
+ * be better.
+ */
+ err = cs_etm__process_decoder_queue(etmq);
+
+ } while (etmq->buf_len);
if (err == 0)
/* Flush any remaining branch stack entries */
- err = cs_etm__flush(etmq);
+ err = cs_etm__end_block(etmq);
}
return err;
}
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
- pid_t tid, u64 time_)
+ pid_t tid)
{
unsigned int i;
struct auxtrace_queues *queues = &etm->queues;
@@ -1096,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
struct cs_etm_queue *etmq = queue->priv;
if (etmq && ((tid == -1) || (etmq->tid == tid))) {
- etmq->time = time_;
cs_etm__set_pid_tid_cpu(etm, queue);
cs_etm__run_decoder(etmq);
}
@@ -1140,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session,
if (event->header.type == PERF_RECORD_EXIT)
return cs_etm__process_timeless_queues(etm,
- event->fork.tid,
- sample->time);
+ event->fork.tid);
return 0;
}
@@ -1295,9 +1852,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
0xffffffff);
/*
- * Create an RB tree for traceID-CPU# tuple. Since the conversion has
- * to be made for each packet that gets decoded, optimizing access in
- * anything other than a sequential array is worth doing.
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
*/
traceid_list = intlist__new(NULL);
if (!traceid_list) {
@@ -1363,8 +1920,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
err = -EINVAL;
goto err_free_metadata;
}
- /* All good, associate the traceID with the CPU# */
- inode->priv = &metadata[j][CS_ETM_CPU];
+ /* All good, associate the traceID with the metadata pointer */
+ inode->priv = metadata[j];
}
/*
@@ -1432,7 +1989,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&etm->synth_opts);
+ itrace_synth_opts__set_default(&etm->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
etm->synth_opts.callchain = false;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 37f8d48179ca..0e97c196147a 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -53,7 +53,51 @@ enum {
CS_ETMV4_PRIV_MAX,
};
-/* RB tree for quick conversion between traceID and CPUs */
+/*
+ * ETMv3 exception encoding number:
+ * See Embedded Trace Macrocell spcification (ARM IHI 0014Q)
+ * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
+ */
+enum {
+ CS_ETMV3_EXC_NONE = 0,
+ CS_ETMV3_EXC_DEBUG_HALT = 1,
+ CS_ETMV3_EXC_SMC = 2,
+ CS_ETMV3_EXC_HYP = 3,
+ CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4,
+ CS_ETMV3_EXC_JAZELLE_THUMBEE = 5,
+ CS_ETMV3_EXC_PE_RESET = 8,
+ CS_ETMV3_EXC_UNDEFINED_INSTR = 9,
+ CS_ETMV3_EXC_SVC = 10,
+ CS_ETMV3_EXC_PREFETCH_ABORT = 11,
+ CS_ETMV3_EXC_DATA_FAULT = 12,
+ CS_ETMV3_EXC_GENERIC = 13,
+ CS_ETMV3_EXC_IRQ = 14,
+ CS_ETMV3_EXC_FIQ = 15,
+};
+
+/*
+ * ETMv4 exception encoding number:
+ * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D)
+ * table 6-12 Possible values for the TYPE field in an Exception instruction
+ * trace packet, for ARMv7-A/R and ARMv8-A/R PEs.
+ */
+enum {
+ CS_ETMV4_EXC_RESET = 0,
+ CS_ETMV4_EXC_DEBUG_HALT = 1,
+ CS_ETMV4_EXC_CALL = 2,
+ CS_ETMV4_EXC_TRAP = 3,
+ CS_ETMV4_EXC_SYSTEM_ERROR = 4,
+ CS_ETMV4_EXC_INST_DEBUG = 6,
+ CS_ETMV4_EXC_DATA_DEBUG = 7,
+ CS_ETMV4_EXC_ALIGNMENT = 10,
+ CS_ETMV4_EXC_INST_FAULT = 11,
+ CS_ETMV4_EXC_DATA_FAULT = 12,
+ CS_ETMV4_EXC_IRQ = 14,
+ CS_ETMV4_EXC_FIQ = 15,
+ CS_ETMV4_EXC_END = 31,
+};
+
+/* RB tree for quick conversion between traceID and metadata pointers */
struct intlist *traceid_list;
#define KiB(x) ((x) * 1024)
@@ -61,14 +105,15 @@ struct intlist *traceid_list;
#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
-static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL;
-static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL;
+#define __perf_cs_etmv3_magic 0x3030303030303030ULL
+#define __perf_cs_etmv4_magic 0x4040404040404040ULL
#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
#ifdef HAVE_CSTRACE_SUPPORT
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
#else
static inline int
cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
@@ -76,6 +121,12 @@ cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
{
return -1;
}
+
+static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
+ int *cpu __maybe_unused)
+{
+ return -1;
+}
#endif
#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 2a36fab76994..26af43ad9ddd 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
{
struct perf_session *session;
struct perf_data data = {
- .file = { .path = input, .fd = -1 },
+ .path = input,
.mode = PERF_DATA_MODE_READ,
.force = opts->force,
};
@@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
fprintf(stderr,
"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
- data.file.path, path);
+ data.path, path);
fprintf(stderr,
"[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index d8cfc19ddb10..7bd5ddeb7a41 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -7,11 +7,117 @@
#include <fcntl.h>
#include <unistd.h>
#include <string.h>
+#include <asm/bug.h>
+#include <sys/types.h>
+#include <dirent.h>
#include "data.h"
#include "util.h"
#include "debug.h"
+static void close_dir(struct perf_data_file *files, int nr)
+{
+ while (--nr >= 1) {
+ close(files[nr].fd);
+ free(files[nr].path);
+ }
+ free(files);
+}
+
+void perf_data__close_dir(struct perf_data *data)
+{
+ close_dir(data->dir.files, data->dir.nr);
+}
+
+int perf_data__create_dir(struct perf_data *data, int nr)
+{
+ struct perf_data_file *files = NULL;
+ int i, ret = -1;
+
+ files = zalloc(nr * sizeof(*files));
+ if (!files)
+ return -ENOMEM;
+
+ data->dir.files = files;
+ data->dir.nr = nr;
+
+ for (i = 0; i < nr; i++) {
+ struct perf_data_file *file = &files[i];
+
+ if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0)
+ goto out_err;
+
+ ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
+ if (ret < 0)
+ goto out_err;
+
+ file->fd = ret;
+ }
+
+ return 0;
+
+out_err:
+ close_dir(files, i);
+ return ret;
+}
+
+int perf_data__open_dir(struct perf_data *data)
+{
+ struct perf_data_file *files = NULL;
+ struct dirent *dent;
+ int ret = -1;
+ DIR *dir;
+ int nr = 0;
+
+ dir = opendir(data->path);
+ if (!dir)
+ return -EINVAL;
+
+ while ((dent = readdir(dir)) != NULL) {
+ struct perf_data_file *file;
+ char path[PATH_MAX];
+ struct stat st;
+
+ snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+ if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data", 4))
+ continue;
+
+ ret = -ENOMEM;
+
+ file = realloc(files, (nr + 1) * sizeof(*files));
+ if (!file)
+ goto out_err;
+
+ files = file;
+ file = &files[nr++];
+
+ file->path = strdup(path);
+ if (!file->path)
+ goto out_err;
+
+ ret = open(file->path, O_RDONLY);
+ if (ret < 0)
+ goto out_err;
+
+ file->fd = ret;
+ file->size = st.st_size;
+ }
+
+ if (!files)
+ return -EINVAL;
+
+ data->dir.files = files;
+ data->dir.nr = nr;
+ return 0;
+
+out_err:
+ close_dir(files, nr);
+ return ret;
+}
+
static bool check_pipe(struct perf_data *data)
{
struct stat st;
@@ -19,11 +125,11 @@ static bool check_pipe(struct perf_data *data)
int fd = perf_data__is_read(data) ?
STDIN_FILENO : STDOUT_FILENO;
- if (!data->file.path) {
+ if (!data->path) {
if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
is_pipe = true;
} else {
- if (!strcmp(data->file.path, "-"))
+ if (!strcmp(data->path, "-"))
is_pipe = true;
}
@@ -37,13 +143,31 @@ static int check_backup(struct perf_data *data)
{
struct stat st;
- if (!stat(data->file.path, &st) && st.st_size) {
- /* TODO check errors properly */
+ if (perf_data__is_read(data))
+ return 0;
+
+ if (!stat(data->path, &st) && st.st_size) {
char oldname[PATH_MAX];
+ int ret;
+
snprintf(oldname, sizeof(oldname), "%s.old",
- data->file.path);
- unlink(oldname);
- rename(data->file.path, oldname);
+ data->path);
+
+ ret = rm_rf_perf_data(oldname);
+ if (ret) {
+ pr_err("Can't remove old data: %s (%s)\n",
+ ret == -2 ?
+ "Unknown file found" : strerror(errno),
+ oldname);
+ return -1;
+ }
+
+ if (rename(data->path, oldname)) {
+ pr_err("Can't move data: %s (%s to %s)\n",
+ strerror(errno),
+ data->path, oldname);
+ return -1;
+ }
}
return 0;
@@ -82,7 +206,7 @@ static int open_file_read(struct perf_data *data)
goto out_close;
}
- data->size = st.st_size;
+ data->file.size = st.st_size;
return fd;
out_close:
@@ -95,9 +219,6 @@ static int open_file_write(struct perf_data *data)
int fd;
char sbuf[STRERR_BUFSIZE];
- if (check_backup(data))
- return -1;
-
fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
S_IRUSR|S_IWUSR);
@@ -115,8 +236,22 @@ static int open_file(struct perf_data *data)
fd = perf_data__is_read(data) ?
open_file_read(data) : open_file_write(data);
+ if (fd < 0) {
+ free(data->file.path);
+ return -1;
+ }
+
data->file.fd = fd;
- return fd < 0 ? -1 : 0;
+ return 0;
+}
+
+static int open_file_dup(struct perf_data *data)
+{
+ data->file.path = strdup(data->path);
+ if (!data->file.path)
+ return -ENOMEM;
+
+ return open_file(data);
}
int perf_data__open(struct perf_data *data)
@@ -124,14 +259,18 @@ int perf_data__open(struct perf_data *data)
if (check_pipe(data))
return 0;
- if (!data->file.path)
- data->file.path = "perf.data";
+ if (!data->path)
+ data->path = "perf.data";
- return open_file(data);
+ if (check_backup(data))
+ return -1;
+
+ return open_file_dup(data);
}
void perf_data__close(struct perf_data *data)
{
+ free(data->file.path);
close(data->file.fd);
}
@@ -159,15 +298,15 @@ int perf_data__switch(struct perf_data *data,
if (perf_data__is_read(data))
return -EINVAL;
- if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0)
+ if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0)
return -ENOMEM;
/*
* Only fire a warning, don't return error, continue fill
* original file.
*/
- if (rename(data->file.path, new_filepath))
- pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath);
+ if (rename(data->path, new_filepath))
+ pr_warning("Failed to rename %s to %s\n", data->path, new_filepath);
if (!at_exit) {
close(data->file.fd);
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 4828f7feea89..14b47be2bd69 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -10,16 +10,22 @@ enum perf_data_mode {
};
struct perf_data_file {
- const char *path;
+ char *path;
int fd;
+ unsigned long size;
};
struct perf_data {
+ const char *path;
struct perf_data_file file;
bool is_pipe;
bool force;
- unsigned long size;
enum perf_data_mode mode;
+
+ struct {
+ struct perf_data_file *files;
+ int nr;
+ } dir;
};
static inline bool perf_data__is_read(struct perf_data *data)
@@ -44,7 +50,7 @@ static inline int perf_data__fd(struct perf_data *data)
static inline unsigned long perf_data__size(struct perf_data *data)
{
- return data->size;
+ return data->file.size;
}
int perf_data__open(struct perf_data *data);
@@ -63,4 +69,8 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
int perf_data__switch(struct perf_data *data,
const char *postfix,
size_t pos, bool at_exit);
+
+int perf_data__create_dir(struct perf_data *data, int nr);
+int perf_data__open_dir(struct perf_data *data);
+void perf_data__close_dir(struct perf_data *data);
#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 69fbb0a72d0c..de9b4769d06c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -20,6 +20,7 @@
#include "thread.h"
#include "comm.h"
#include "symbol.h"
+#include "map.h"
#include "event.h"
#include "util.h"
#include "thread-stack.h"
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
deleted file mode 100644
index eec754243f4d..000000000000
--- a/tools/perf/util/drv_configs.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * drv_configs.h: Interface to apply PMU specific configuration
- * Copyright (c) 2016-2018, Linaro Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- */
-
-#include "drv_configs.h"
-#include "evlist.h"
-#include "evsel.h"
-#include "pmu.h"
-#include <errno.h>
-
-static int
-perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
- struct perf_evsel_config_term **err_term)
-{
- bool found = false;
- int err = 0;
- struct perf_evsel_config_term *term;
- struct perf_pmu *pmu = NULL;
-
- while ((pmu = perf_pmu__scan(pmu)) != NULL)
- if (pmu->type == evsel->attr.type) {
- found = true;
- break;
- }
-
- list_for_each_entry(term, &evsel->config_terms, list) {
- if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
- continue;
-
- /*
- * We have a configuration term, report an error if we
- * can't find the PMU or if the PMU driver doesn't support
- * cmd line driver configuration.
- */
- if (!found || !pmu->set_drv_config) {
- err = -EINVAL;
- *err_term = term;
- break;
- }
-
- err = pmu->set_drv_config(term);
- if (err) {
- *err_term = term;
- break;
- }
- }
-
- return err;
-}
-
-int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
- struct perf_evsel **err_evsel,
- struct perf_evsel_config_term **err_term)
-{
- struct perf_evsel *evsel;
- int err = 0;
-
- evlist__for_each_entry(evlist, evsel) {
- err = perf_evsel__apply_drv_configs(evsel, err_term);
- if (err) {
- *err_evsel = evsel;
- break;
- }
- }
-
- return err;
-}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
deleted file mode 100644
index 32bc9babc2e0..000000000000
--- a/tools/perf/util/drv_configs.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * drv_configs.h: Interface to apply PMU specific configuration
- * Copyright (c) 2016-2018, Linaro Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- */
-
-#ifndef __PERF_DRV_CONFIGS_H
-#define __PERF_DRV_CONFIGS_H
-
-#include "drv_configs.h"
-#include "evlist.h"
-#include "evsel.h"
-
-int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
- struct perf_evsel **err_evsel,
- struct perf_evsel_config_term **term);
-#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index bbed90e5d9bb..ba58ba603b69 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -8,8 +8,11 @@
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
+#include <libgen.h>
#include "compress.h"
+#include "namespaces.h"
#include "path.h"
+#include "map.h"
#include "symbol.h"
#include "srcline.h"
#include "dso.h"
@@ -295,7 +298,7 @@ static int decompress_kmodule(struct dso *dso, const char *name,
unlink(tmpbuf);
if (pathname && (fd >= 0))
- strncpy(pathname, tmpbuf, len);
+ strlcpy(pathname, tmpbuf, len);
return fd;
}
@@ -894,7 +897,7 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine,
return r;
}
-static int data_file_size(struct dso *dso, struct machine *machine)
+int dso__data_file_size(struct dso *dso, struct machine *machine)
{
int ret = 0;
struct stat st;
@@ -943,7 +946,7 @@ out:
*/
off_t dso__data_size(struct dso *dso, struct machine *machine)
{
- if (data_file_size(dso, machine))
+ if (dso__data_file_size(dso, machine))
return -1;
/* For now just estimate dso data size is close to file size */
@@ -953,7 +956,7 @@ off_t dso__data_size(struct dso *dso, struct machine *machine)
static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size)
{
- if (data_file_size(dso, machine))
+ if (dso__data_file_size(dso, machine))
return -1;
/* Check the offset sanity. */
@@ -1195,10 +1198,10 @@ struct dso *dso__new(const char *name)
strcpy(dso->name, name);
dso__set_long_name(dso, dso->name, false);
dso__set_short_name(dso, dso->name, false);
- dso->symbols = dso->symbol_names = RB_ROOT;
+ dso->symbols = dso->symbol_names = RB_ROOT_CACHED;
dso->data.cache = RB_ROOT;
- dso->inlined_nodes = RB_ROOT;
- dso->srclines = RB_ROOT;
+ dso->inlined_nodes = RB_ROOT_CACHED;
+ dso->srclines = RB_ROOT_CACHED;
dso->data.fd = -1;
dso->data.status = DSO_DATA_STATUS_UNKNOWN;
dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1467,7 +1470,7 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
ret += dso__fprintf_buildid(dso, fp);
ret += fprintf(fp, ")\n");
- for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) {
struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
ret += symbol__fprintf(pos, fp);
}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c5380500bed4..bb417c54c25a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -7,13 +7,14 @@
#include <linux/rbtree.h>
#include <sys/types.h>
#include <stdbool.h>
+#include <stdio.h>
#include "rwsem.h"
-#include <linux/types.h>
#include <linux/bitops.h>
-#include "map.h"
-#include "namespaces.h"
#include "build-id.h"
+struct machine;
+struct map;
+
enum dso_binary_type {
DSO_BINARY_TYPE__KALLSYMS = 0,
DSO_BINARY_TYPE__GUEST_KALLSYMS,
@@ -140,10 +141,10 @@ struct dso {
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root *root; /* root of rbtree that rb_node is in */
- struct rb_root symbols;
- struct rb_root symbol_names;
- struct rb_root inlined_nodes;
- struct rb_root srclines;
+ struct rb_root_cached symbols;
+ struct rb_root_cached symbol_names;
+ struct rb_root_cached inlined_nodes;
+ struct rb_root_cached srclines;
struct {
u64 addr;
struct symbol *symbol;
@@ -235,7 +236,7 @@ bool dso__loaded(const struct dso *dso);
static inline bool dso__has_symbols(const struct dso *dso)
{
- return !RB_EMPTY_ROOT(&dso->symbols);
+ return !RB_EMPTY_ROOT(&dso->symbols.rb_root);
}
bool dso__sorted_by_name(const struct dso *dso);
@@ -322,6 +323,7 @@ int dso__data_get_fd(struct dso *dso, struct machine *machine);
void dso__data_put_fd(struct dso *dso);
void dso__data_close(struct dso *dso);
+int dso__data_file_size(struct dso *dso, struct machine *machine);
off_t dso__data_size(struct dso *dso, struct machine *machine);
ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
u64 offset, u8 *data, ssize_t size);
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
index 10988d3de7ce..2bd8585db93c 100644
--- a/tools/perf/util/dump-insn.c
+++ b/tools/perf/util/dump-insn.c
@@ -13,3 +13,11 @@ const char *dump_insn(struct perf_insn *x __maybe_unused,
*lenp = 0;
return "?";
}
+
+__weak
+int arch_is_branch(const unsigned char *buf __maybe_unused,
+ size_t len __maybe_unused,
+ int x86_64 __maybe_unused)
+{
+ return 0;
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
index 0e06280a8860..650125061530 100644
--- a/tools/perf/util/dump-insn.h
+++ b/tools/perf/util/dump-insn.h
@@ -20,4 +20,6 @@ struct perf_insn {
const char *dump_insn(struct perf_insn *x, u64 ip,
u8 *inbuf, int inlen, int *lenp);
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64);
+
#endif
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 59f38c7693f8..4c23779e271a 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -166,7 +166,7 @@ const char *perf_env__arch(struct perf_env *env)
struct utsname uts;
char *arch_name;
- if (!env) { /* Assume local operation */
+ if (!env || !env->arch) { /* Assume local operation */
if (uname(&uts) < 0)
return NULL;
arch_name = uts.machine;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1f3ccc368530..d01b8355f4ca 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -63,6 +63,7 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
+ u64 clockid_res_ns;
};
extern struct perf_env perf_env;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index bc646185f8d9..ba7be74fad6e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -21,9 +21,15 @@
#include "thread.h"
#include "thread_map.h"
#include "sane_ctype.h"
+#include "map.h"
+#include "symbol.h"
#include "symbol/kallsyms.h"
#include "asm/bug.h"
#include "stat.h"
+#include "session.h"
+#include "bpf-event.h"
+
+#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
static const char *perf_event__names[] = {
[0] = "TOTAL",
@@ -43,6 +49,8 @@ static const char *perf_event__names[] = {
[PERF_RECORD_SWITCH] = "SWITCH",
[PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
+ [PERF_RECORD_KSYMBOL] = "KSYMBOL",
+ [PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -72,6 +80,8 @@ static const char *perf_ns__names[] = {
[CGROUP_NS_INDEX] = "cgroup",
};
+unsigned int proc_map_timeout = DEFAULT_PROC_MAP_PARSE_TIMEOUT;
+
const char *perf_event__name(unsigned int id)
{
if (id >= ARRAY_SIZE(perf_event__names))
@@ -308,6 +318,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
event->fork.pid = tgid;
event->fork.tid = pid;
event->fork.header.type = PERF_RECORD_FORK;
+ event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC;
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
@@ -322,8 +333,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data,
- unsigned int proc_map_timeout)
+ bool mmap_data)
{
char filename[PATH_MAX];
FILE *fp;
@@ -520,8 +530,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
perf_event__handler_t process,
struct perf_tool *tool,
struct machine *machine,
- bool mmap_data,
- unsigned int proc_map_timeout)
+ bool mmap_data)
{
char filename[PATH_MAX];
DIR *tasks;
@@ -547,8 +556,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
*/
if (pid == tgid &&
perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
- process, machine, mmap_data,
- proc_map_timeout))
+ process, machine, mmap_data))
return -1;
return 0;
@@ -597,7 +605,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
if (_pid == pid) {
/* process the parent's maps too */
rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
- process, machine, mmap_data, proc_map_timeout);
+ process, machine, mmap_data);
if (rc)
break;
}
@@ -611,8 +619,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data,
- unsigned int proc_map_timeout)
+ bool mmap_data)
{
union perf_event *comm_event, *mmap_event, *fork_event;
union perf_event *namespaces_event;
@@ -642,7 +649,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
fork_event, namespaces_event,
thread_map__pid(threads, thread), 0,
process, tool, machine,
- mmap_data, proc_map_timeout)) {
+ mmap_data)) {
err = -1;
break;
}
@@ -668,7 +675,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
fork_event, namespaces_event,
comm_event->comm.pid, 0,
process, tool, machine,
- mmap_data, proc_map_timeout)) {
+ mmap_data)) {
err = -1;
break;
}
@@ -689,7 +696,6 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool mmap_data,
- unsigned int proc_map_timeout,
struct dirent **dirent,
int start,
int num)
@@ -733,8 +739,7 @@ static int __perf_event__synthesize_threads(struct perf_tool *tool,
*/
__event__synthesize_thread(comm_event, mmap_event, fork_event,
namespaces_event, pid, 1, process,
- tool, machine, mmap_data,
- proc_map_timeout);
+ tool, machine, mmap_data);
}
err = 0;
@@ -754,7 +759,6 @@ struct synthesize_threads_arg {
perf_event__handler_t process;
struct machine *machine;
bool mmap_data;
- unsigned int proc_map_timeout;
struct dirent **dirent;
int num;
int start;
@@ -766,7 +770,7 @@ static void *synthesize_threads_worker(void *arg)
__perf_event__synthesize_threads(args->tool, args->process,
args->machine, args->mmap_data,
- args->proc_map_timeout, args->dirent,
+ args->dirent,
args->start, args->num);
return NULL;
}
@@ -775,7 +779,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine,
bool mmap_data,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
struct synthesize_threads_arg *args = NULL;
@@ -805,7 +808,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
if (thread_nr <= 1) {
err = __perf_event__synthesize_threads(tool, process,
machine, mmap_data,
- proc_map_timeout,
dirent, base, n);
goto free_dirent;
}
@@ -827,7 +829,6 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
args[i].process = process;
args[i].machine = machine;
args[i].mmap_data = mmap_data;
- args[i].proc_map_timeout = proc_map_timeout;
args[i].dirent = dirent;
}
for (i = 0; i < m; i++) {
@@ -1334,6 +1335,22 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
return machine__process_switch_event(machine, event);
}
+int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_ksymbol(machine, event, sample);
+}
+
+int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_bpf_event(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -1466,6 +1483,21 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
}
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
+ event->ksymbol_event.addr, event->ksymbol_event.len,
+ event->ksymbol_event.ksym_type,
+ event->ksymbol_event.flags, event->ksymbol_event.name);
+}
+
+size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " bpf event with type %u, flags %u, id %u\n",
+ event->bpf_event.type, event->bpf_event.flags,
+ event->bpf_event.id);
+}
+
size_t perf_event__fprintf(union perf_event *event, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1501,6 +1533,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_LOST:
ret += perf_event__fprintf_lost(event, fp);
break;
+ case PERF_RECORD_KSYMBOL:
+ ret += perf_event__fprintf_ksymbol(event, fp);
+ break;
+ case PERF_RECORD_BPF_EVENT:
+ ret += perf_event__fprintf_bpf_event(event, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
@@ -1576,6 +1614,24 @@ struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
return al->map;
}
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ struct map *map = thread__find_map(thread, cpumode, addr, al);
+ struct machine *machine = thread->mg->machine;
+ u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+ if (map || addr_cpumode == cpumode)
+ return map;
+
+ return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al)
{
@@ -1585,6 +1641,15 @@ struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
return al->sym;
}
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+ al->sym = NULL;
+ if (thread__find_map_fb(thread, cpumode, addr, al))
+ al->sym = map__find_symbol(al->map, al->addr);
+ return al->sym;
+}
+
/*
* Callers need to drop the reference to al->thread, obtained in
* machine__findnew_thread()
@@ -1678,7 +1743,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
void thread__resolve(struct thread *thread, struct addr_location *al,
struct perf_sample *sample)
{
- thread__find_map(thread, sample->cpumode, sample->addr, al);
+ thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
al->cpu = sample->cpu;
al->sym = NULL;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index bfa60bcafbde..36ae7e92dab1 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -5,6 +5,7 @@
#include <limits.h>
#include <stdio.h>
#include <linux/kernel.h>
+#include <linux/bpf.h>
#include "../perf.h"
#include "build-id.h"
@@ -84,6 +85,29 @@ struct throttle_event {
u64 stream_id;
};
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+struct ksymbol_event {
+ struct perf_event_header header;
+ u64 addr;
+ u32 len;
+ u16 ksym_type;
+ u16 flags;
+ char name[KSYM_NAME_LEN];
+};
+
+struct bpf_event {
+ struct perf_event_header header;
+ u16 type;
+ u16 flags;
+ u32 id;
+
+ /* for bpf_prog types */
+ u8 tag[BPF_TAG_SIZE]; // prog tag
+};
+
#define PERF_SAMPLE_MASK \
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
@@ -137,26 +161,7 @@ struct ip_callchain {
u64 ips[0];
};
-struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 cycles:16;
- u64 type:4;
- u64 reserved:40;
-};
-
-struct branch_entry {
- u64 from;
- u64 to;
- struct branch_flags flags;
-};
-
-struct branch_stack {
- u64 nr;
- struct branch_entry entries[0];
-};
+struct branch_stack;
enum {
PERF_IP_FLAG_BRANCH = 1ULL << 0,
@@ -527,8 +532,9 @@ struct auxtrace_error_event {
u32 cpu;
u32 pid;
u32 tid;
- u32 reserved__; /* For alignment */
+ u32 fmt;
u64 ip;
+ u64 time;
char msg[MAX_AUXTRACE_ERROR_MSG];
};
@@ -651,6 +657,8 @@ union perf_event {
struct stat_round_event stat_round;
struct time_conv_event time_conv;
struct feature_event feat;
+ struct ksymbol_event ksymbol_event;
+ struct bpf_event bpf_event;
};
void perf_event__print_totals(void);
@@ -669,8 +677,7 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
int perf_event__synthesize_thread_map(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
- struct machine *machine, bool mmap_data,
- unsigned int proc_map_timeout);
+ struct machine *machine, bool mmap_data);
int perf_event__synthesize_thread_map2(struct perf_tool *tool,
struct thread_map *threads,
perf_event__handler_t process,
@@ -682,7 +689,6 @@ int perf_event__synthesize_cpu_map(struct perf_tool *tool,
int perf_event__synthesize_threads(struct perf_tool *tool,
perf_event__handler_t process,
struct machine *machine, bool mmap_data,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
perf_event__handler_t process,
@@ -750,6 +756,14 @@ int perf_event__process_exit(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_ksymbol(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_bpf_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_tool__process_synth_event(struct perf_tool *tool,
union perf_event *event,
struct machine *machine,
@@ -797,8 +811,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
pid_t pid, pid_t tgid,
perf_event__handler_t process,
struct machine *machine,
- bool mmap_data,
- unsigned int proc_map_timeout);
+ bool mmap_data);
int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
perf_event__handler_t process,
@@ -814,6 +827,8 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
@@ -829,5 +844,6 @@ int perf_event_paranoid(void);
extern int sysctl_perf_event_max_stack;
extern int sysctl_perf_event_max_contexts_per_stack;
+extern unsigned int proc_map_timeout;
#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index be440df29615..08cedb643ea6 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -34,6 +34,10 @@
#include <linux/log2.h>
#include <linux/err.h>
+#ifdef LACKS_SIGQUEUE_PROTOTYPE
+int sigqueue(pid_t pid, int sig, const union sigval value);
+#endif
+
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
@@ -358,7 +362,7 @@ void perf_evlist__disable(struct perf_evlist *evlist)
struct perf_evsel *pos;
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd)
continue;
perf_evsel__disable(pos);
}
@@ -1018,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite)
+ bool auxtrace_overwrite, int nr_cblocks, int affinity)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1028,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp;
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1060,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
@@ -1176,7 +1180,7 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e
return err;
}
-int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
+int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter)
{
struct perf_evsel *evsel;
int err = 0;
@@ -1193,7 +1197,7 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
return err;
}
-int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
+int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
{
char *filter;
int ret = -1;
@@ -1214,15 +1218,15 @@ int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t
}
}
- ret = perf_evlist__set_filter(evlist, filter);
+ ret = perf_evlist__set_tp_filter(evlist, filter);
out_free:
free(filter);
return ret;
}
-int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
+int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid)
{
- return perf_evlist__set_filter_pids(evlist, 1, &pid);
+ return perf_evlist__set_tp_filter_pids(evlist, 1, &pid);
}
bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
@@ -1810,3 +1814,30 @@ void perf_evlist__force_leader(struct perf_evlist *evlist)
leader->forced_leader = true;
}
}
+
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list,
+ struct perf_evsel *evsel)
+{
+ struct perf_evsel *c2, *leader;
+ bool is_open = true;
+
+ leader = evsel->leader;
+ pr_debug("Weak group for %s/%d failed\n",
+ leader->name, leader->nr_members);
+
+ /*
+ * for_each_group_member doesn't work here because it doesn't
+ * include the first entry.
+ */
+ evlist__for_each_entry(evsel_list, c2) {
+ if (c2 == evsel)
+ is_open = false;
+ if (c2->leader == leader) {
+ if (is_open)
+ perf_evsel__close(c2);
+ c2->leader = c2;
+ c2->nr_members = 0;
+ }
+ }
+ return leader;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index dc66436add98..744906dd4887 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -49,6 +49,9 @@ struct perf_evlist {
struct perf_evsel *selected;
struct events_stats stats;
struct perf_env *env;
+ void (*trace_event_sample_raw)(struct perf_evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample);
u64 first_sample_time;
u64 last_sample_time;
};
@@ -98,9 +101,9 @@ void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
#define perf_evlist__reset_sample_bit(evlist, bit) \
__perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
-int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
-int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
-int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
+int perf_evlist__set_tp_filter(struct perf_evlist *evlist, const char *filter);
+int perf_evlist__set_tp_filter_pid(struct perf_evlist *evlist, pid_t pid);
+int perf_evlist__set_tp_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
struct perf_evsel *
perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
@@ -162,7 +165,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite);
+ bool auxtrace_overwrite, int nr_cblocks, int affinity);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
@@ -312,4 +315,6 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
void perf_evlist__force_leader(struct perf_evlist *evlist);
+struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist,
+ struct perf_evsel *evsel);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 29d7b97f66fb..dfe2958e6287 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel,
evsel->leader = evsel;
evsel->unit = "";
evsel->scale = 1.0;
+ evsel->max_events = ULONG_MAX;
evsel->evlist = NULL;
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->node);
@@ -793,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel,
case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
max_stack = term->val.max_stack;
break;
+ case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+ evsel->max_events = term->val.max_events;
+ break;
case PERF_EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
@@ -952,7 +956,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->sample_freq = 0;
attr->sample_period = 0;
attr->write_backward = 0;
- attr->sample_id_all = 0;
+
+ /*
+ * We don't get sample for slave events, we make them
+ * when delivering group leader sample. Set the slave
+ * event to follow the master sample_type to ease up
+ * report.
+ */
+ attr->sample_type = leader->attr.sample_type;
}
if (opts->no_samples)
@@ -1032,6 +1043,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
+ attr->bpf_event = track && opts->bpf_event &&
+ !perf_missing_features.bpf_event;
if (opts->record_namespaces)
attr->namespaces = track;
@@ -1089,7 +1103,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
attr->exclude_user = 1;
}
- if (evsel->own_cpus)
+ if (evsel->own_cpus || evsel->unit)
evsel->attr.read_format |= PERF_FORMAT_ID;
/*
@@ -1203,16 +1217,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
int perf_evsel__enable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel,
- PERF_EVENT_IOC_ENABLE,
- 0);
+ int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+
+ if (!err)
+ evsel->disabled = false;
+
+ return err;
}
int perf_evsel__disable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel,
- PERF_EVENT_IOC_DISABLE,
- 0);
+ int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+ /*
+ * We mark it disabled here so that tools that disable a event can
+ * ignore events after they disable it. I.e. the ring buffer may have
+ * already a few more events queued up before the kernel got the stop
+ * request.
+ */
+ if (!err)
+ evsel->disabled = true;
+
+ return err;
}
int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
@@ -1638,6 +1663,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(context_switch, p_unsigned);
PRINT_ATTRf(write_backward, p_unsigned);
PRINT_ATTRf(namespaces, p_unsigned);
+ PRINT_ATTRf(ksymbol, p_unsigned);
+ PRINT_ATTRf(bpf_event, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
@@ -1797,6 +1824,10 @@ fallback_missing_features:
PERF_SAMPLE_BRANCH_NO_CYCLES);
if (perf_missing_features.group_read && evsel->attr.inherit)
evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+ if (perf_missing_features.ksymbol)
+ evsel->attr.ksymbol = 0;
+ if (perf_missing_features.bpf_event)
+ evsel->attr.bpf_event = 0;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->attr.sample_id_all = 0;
@@ -1916,7 +1947,15 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+ if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) {
+ perf_missing_features.bpf_event = true;
+ pr_debug2("switching off bpf_event\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) {
+ perf_missing_features.ksymbol = true;
+ pr_debug2("switching off ksymbol\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
perf_missing_features.write_backward = true;
pr_debug2("switching off write_backward\n");
goto out_close;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4107c39f4a54..cc578e02e08f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -8,7 +8,7 @@
#include <linux/perf_event.h>
#include <linux/types.h>
#include "xyarray.h"
-#include "symbol.h"
+#include "symbol_conf.h"
#include "cpumap.h"
#include "counts.h"
@@ -46,6 +46,7 @@ enum term_type {
PERF_EVSEL__CONFIG_TERM_STACK_USER,
PERF_EVSEL__CONFIG_TERM_INHERIT,
PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+ PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
PERF_EVSEL__CONFIG_TERM_OVERWRITE,
PERF_EVSEL__CONFIG_TERM_DRV_CFG,
PERF_EVSEL__CONFIG_TERM_BRANCH,
@@ -65,6 +66,7 @@ struct perf_evsel_config_term {
bool inherit;
bool overwrite;
char *branch;
+ unsigned long max_events;
} val;
bool weak;
};
@@ -99,10 +101,12 @@ struct perf_evsel {
struct perf_counts *prev_raw_counts;
int idx;
u32 ids;
+ unsigned long max_events;
+ unsigned long nr_events_printed;
char *name;
double scale;
const char *unit;
- struct tep_event_format *tp_format;
+ struct tep_event *tp_format;
off_t id_offset;
struct perf_stat_evsel *stats;
void *priv;
@@ -119,6 +123,7 @@ struct perf_evsel {
bool snapshot;
bool supported;
bool needs_swap;
+ bool disabled;
bool no_aux_samples;
bool immediate;
bool system_wide;
@@ -163,6 +168,8 @@ struct perf_missing_features {
bool lbr_flags;
bool write_backward;
bool group_read;
+ bool ksymbol;
+ bool bpf_event;
};
extern struct perf_missing_features perf_missing_features;
@@ -211,7 +218,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *
struct perf_evsel *perf_evsel__new_cycles(bool precise);
-struct tep_event_format *event_format__new(const char *sys, const char *name);
+struct tep_event *event_format__new(const char *sys, const char *name);
void perf_evsel__init(struct perf_evsel *evsel,
struct perf_event_attr *attr, int idx);
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 0d0a4c6f368b..95ea147f9e18 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -173,6 +173,7 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
if (!print_oneline)
printed += fprintf(fp, "\n");
+ /* Add srccode here too? */
if (symbol_conf.bt_stop_list &&
node->sym &&
strlist__has_entry(symbol_conf.bt_stop_list,
diff --git a/tools/perf/util/find-vdso-map.c b/tools/perf/util/find-map.c
index d7823e3508fc..7b2300588ece 100644
--- a/tools/perf/util/find-vdso-map.c
+++ b/tools/perf/util/find-map.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-static int find_vdso_map(void **start, void **end)
+static int find_map(void **start, void **end, const char *name)
{
FILE *maps;
char line[128];
@@ -7,7 +7,7 @@ static int find_vdso_map(void **start, void **end)
maps = fopen("/proc/self/maps", "r");
if (!maps) {
- fprintf(stderr, "vdso: cannot open maps\n");
+ fprintf(stderr, "cannot open maps\n");
return -1;
}
@@ -21,8 +21,7 @@ static int find_vdso_map(void **start, void **end)
if (m < 0)
continue;
- if (!strncmp(&line[m], VDSO__MAP_NAME,
- sizeof(VDSO__MAP_NAME) - 1))
+ if (!strncmp(&line[m], name, strlen(name)))
found = 1;
}
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index de322d51c7fe..b72440bf9a79 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#elif defined(__powerpc__)
#define GEN_ELF_ARCH EM_PPC
#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__sparc__) && defined(__arch64__)
+#define GEN_ELF_ARCH EM_SPARCV9
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__sparc__)
+#define GEN_ELF_ARCH EM_SPARC
+#define GEN_ELF_CLASS ELFCLASS32
#else
#error "unsupported architecture"
#endif
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000000..267aa609a582
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "util.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdlib.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+ char pwd[PATH_MAX];
+
+ return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 1ec1d9bc2d63..01b324c275b9 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -39,6 +39,7 @@
#include "tool.h"
#include "time-utils.h"
#include "units.h"
+#include "cputopo.h"
#include "sane_ctype.h"
@@ -526,17 +527,11 @@ static int write_event_desc(struct feat_fd *ff,
static int write_cmdline(struct feat_fd *ff,
struct perf_evlist *evlist __maybe_unused)
{
- char buf[MAXPATHLEN];
- u32 n;
- int i, ret;
+ char pbuf[MAXPATHLEN], *buf;
+ int i, ret, n;
/* actual path to perf binary */
- ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
- if (ret <= 0)
- return -1;
-
- /* readlink() does not add null termination */
- buf[ret] = '\0';
+ buf = perf_exe(pbuf, MAXPATHLEN);
/* account for binary path */
n = perf_env.nr_cmdline + 1;
@@ -557,160 +552,15 @@ static int write_cmdline(struct feat_fd *ff,
return 0;
}
-#define CORE_SIB_FMT \
- "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list"
-#define THRD_SIB_FMT \
- "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
-
-struct cpu_topo {
- u32 cpu_nr;
- u32 core_sib;
- u32 thread_sib;
- char **core_siblings;
- char **thread_siblings;
-};
-
-static int build_cpu_topo(struct cpu_topo *tp, int cpu)
-{
- FILE *fp;
- char filename[MAXPATHLEN];
- char *buf = NULL, *p;
- size_t len = 0;
- ssize_t sret;
- u32 i = 0;
- int ret = -1;
-
- sprintf(filename, CORE_SIB_FMT, cpu);
- fp = fopen(filename, "r");
- if (!fp)
- goto try_threads;
-
- sret = getline(&buf, &len, fp);
- fclose(fp);
- if (sret <= 0)
- goto try_threads;
-
- p = strchr(buf, '\n');
- if (p)
- *p = '\0';
-
- for (i = 0; i < tp->core_sib; i++) {
- if (!strcmp(buf, tp->core_siblings[i]))
- break;
- }
- if (i == tp->core_sib) {
- tp->core_siblings[i] = buf;
- tp->core_sib++;
- buf = NULL;
- len = 0;
- }
- ret = 0;
-
-try_threads:
- sprintf(filename, THRD_SIB_FMT, cpu);
- fp = fopen(filename, "r");
- if (!fp)
- goto done;
-
- if (getline(&buf, &len, fp) <= 0)
- goto done;
-
- p = strchr(buf, '\n');
- if (p)
- *p = '\0';
-
- for (i = 0; i < tp->thread_sib; i++) {
- if (!strcmp(buf, tp->thread_siblings[i]))
- break;
- }
- if (i == tp->thread_sib) {
- tp->thread_siblings[i] = buf;
- tp->thread_sib++;
- buf = NULL;
- }
- ret = 0;
-done:
- if(fp)
- fclose(fp);
- free(buf);
- return ret;
-}
-
-static void free_cpu_topo(struct cpu_topo *tp)
-{
- u32 i;
-
- if (!tp)
- return;
-
- for (i = 0 ; i < tp->core_sib; i++)
- zfree(&tp->core_siblings[i]);
-
- for (i = 0 ; i < tp->thread_sib; i++)
- zfree(&tp->thread_siblings[i]);
-
- free(tp);
-}
-
-static struct cpu_topo *build_cpu_topology(void)
-{
- struct cpu_topo *tp = NULL;
- void *addr;
- u32 nr, i;
- size_t sz;
- long ncpus;
- int ret = -1;
- struct cpu_map *map;
-
- ncpus = cpu__max_present_cpu();
-
- /* build online CPU map */
- map = cpu_map__new(NULL);
- if (map == NULL) {
- pr_debug("failed to get system cpumap\n");
- return NULL;
- }
-
- nr = (u32)(ncpus & UINT_MAX);
-
- sz = nr * sizeof(char *);
- addr = calloc(1, sizeof(*tp) + 2 * sz);
- if (!addr)
- goto out_free;
-
- tp = addr;
- tp->cpu_nr = nr;
- addr += sizeof(*tp);
- tp->core_siblings = addr;
- addr += sz;
- tp->thread_siblings = addr;
-
- for (i = 0; i < nr; i++) {
- if (!cpu_map__has(map, i))
- continue;
-
- ret = build_cpu_topo(tp, i);
- if (ret < 0)
- break;
- }
-
-out_free:
- cpu_map__put(map);
- if (ret) {
- free_cpu_topo(tp);
- tp = NULL;
- }
- return tp;
-}
static int write_cpu_topology(struct feat_fd *ff,
struct perf_evlist *evlist __maybe_unused)
{
- struct cpu_topo *tp;
+ struct cpu_topology *tp;
u32 i;
int ret, j;
- tp = build_cpu_topology();
+ tp = cpu_topology__new();
if (!tp)
return -1;
@@ -748,7 +598,7 @@ static int write_cpu_topology(struct feat_fd *ff,
return ret;
}
done:
- free_cpu_topo(tp);
+ cpu_topology__delete(tp);
return ret;
}
@@ -783,112 +633,45 @@ static int write_total_mem(struct feat_fd *ff,
return ret;
}
-static int write_topo_node(struct feat_fd *ff, int node)
-{
- char str[MAXPATHLEN];
- char field[32];
- char *buf = NULL, *p;
- size_t len = 0;
- FILE *fp;
- u64 mem_total, mem_free, mem;
- int ret = -1;
-
- sprintf(str, "/sys/devices/system/node/node%d/meminfo", node);
- fp = fopen(str, "r");
- if (!fp)
- return -1;
-
- while (getline(&buf, &len, fp) > 0) {
- /* skip over invalid lines */
- if (!strchr(buf, ':'))
- continue;
- if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
- goto done;
- if (!strcmp(field, "MemTotal:"))
- mem_total = mem;
- if (!strcmp(field, "MemFree:"))
- mem_free = mem;
- }
-
- fclose(fp);
- fp = NULL;
-
- ret = do_write(ff, &mem_total, sizeof(u64));
- if (ret)
- goto done;
-
- ret = do_write(ff, &mem_free, sizeof(u64));
- if (ret)
- goto done;
-
- ret = -1;
- sprintf(str, "/sys/devices/system/node/node%d/cpulist", node);
-
- fp = fopen(str, "r");
- if (!fp)
- goto done;
-
- if (getline(&buf, &len, fp) <= 0)
- goto done;
-
- p = strchr(buf, '\n');
- if (p)
- *p = '\0';
-
- ret = do_write_string(ff, buf);
-done:
- free(buf);
- if (fp)
- fclose(fp);
- return ret;
-}
-
static int write_numa_topology(struct feat_fd *ff,
struct perf_evlist *evlist __maybe_unused)
{
- char *buf = NULL;
- size_t len = 0;
- FILE *fp;
- struct cpu_map *node_map = NULL;
- char *c;
- u32 nr, i, j;
+ struct numa_topology *tp;
int ret = -1;
+ u32 i;
- fp = fopen("/sys/devices/system/node/online", "r");
- if (!fp)
- return -1;
-
- if (getline(&buf, &len, fp) <= 0)
- goto done;
+ tp = numa_topology__new();
+ if (!tp)
+ return -ENOMEM;
- c = strchr(buf, '\n');
- if (c)
- *c = '\0';
+ ret = do_write(ff, &tp->nr, sizeof(u32));
+ if (ret < 0)
+ goto err;
- node_map = cpu_map__new(buf);
- if (!node_map)
- goto done;
+ for (i = 0; i < tp->nr; i++) {
+ struct numa_topology_node *n = &tp->nodes[i];
- nr = (u32)node_map->nr;
+ ret = do_write(ff, &n->node, sizeof(u32));
+ if (ret < 0)
+ goto err;
- ret = do_write(ff, &nr, sizeof(nr));
- if (ret < 0)
- goto done;
+ ret = do_write(ff, &n->mem_total, sizeof(u64));
+ if (ret)
+ goto err;
- for (i = 0; i < nr; i++) {
- j = (u32)node_map->map[i];
- ret = do_write(ff, &j, sizeof(j));
- if (ret < 0)
- break;
+ ret = do_write(ff, &n->mem_free, sizeof(u64));
+ if (ret)
+ goto err;
- ret = write_topo_node(ff, i);
+ ret = do_write_string(ff, n->cpus);
if (ret < 0)
- break;
+ goto err;
}
-done:
- free(buf);
- fclose(fp);
- cpu_map__put(node_map);
+
+ ret = 0;
+
+err:
+ numa_topology__delete(tp);
return ret;
}
@@ -988,6 +771,45 @@ static int write_group_desc(struct feat_fd *ff,
}
/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ return NULL;
+}
+
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
+ if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", mapcpuid);
+ return 1;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ return 0;
+ }
+ return 1;
+}
+
+/*
* default get_cpuid(): nothing gets recorded
* actual implementation must be in arch/$(SRCARCH)/util/header.c
*/
@@ -1003,11 +825,9 @@ static int write_cpuid(struct feat_fd *ff,
int ret;
ret = get_cpuid(buffer, sizeof(buffer));
- if (!ret)
- goto write_it;
+ if (ret)
+ return -1;
- return -1;
-write_it:
return do_write_string(ff, buffer);
}
@@ -1034,6 +854,13 @@ static int write_auxtrace(struct feat_fd *ff,
return err;
}
+static int write_clockid(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return do_write(ff, &ff->ph->env.clockid_res_ns,
+ sizeof(ff->ph->env.clockid_res_ns));
+}
+
static int cpu_cache_level__sort(const void *a, const void *b)
{
struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
@@ -1508,6 +1335,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# Core ID and Socket ID information is not available\n");
}
+static void print_clockid(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
+ ff->ph->env.clockid_res_ns * 1000);
+}
+
static void free_event_desc(struct perf_evsel *events)
{
struct perf_evsel *evsel;
@@ -2531,6 +2364,15 @@ out:
return ret;
}
+static int process_clockid(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+ return -1;
+
+ return 0;
+}
+
struct feature_ops {
int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2590,6 +2432,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPN(CACHE, cache, true),
FEAT_OPR(SAMPLE_TIME, sample_time, false),
FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
+ FEAT_OPR(CLOCKID, clockid, false)
};
struct header_print_data {
@@ -2636,6 +2479,7 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
struct perf_header *header = &session->header;
int fd = perf_data__fd(session->data);
struct stat st;
+ time_t stctime;
int ret, bit;
hd.fp = fp;
@@ -2645,7 +2489,8 @@ int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
if (ret == -1)
return -1;
- fprintf(fp, "# captured on : %s", ctime(&st.st_ctime));
+ stctime = st.st_ctime;
+ fprintf(fp, "# captured on : %s", ctime(&stctime));
fprintf(fp, "# header version : %u\n", header->version);
fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
@@ -2736,7 +2581,7 @@ static int perf_header__adds_write(struct perf_header *header,
lseek(fd, sec_start, SEEK_SET);
/*
* may write more than needed due to dropped feature, but
- * this is okay, reader will skip the mising entries
+ * this is okay, reader will skip the missing entries
*/
err = do_write(&ff, feat_sec, sec_size);
if (err < 0)
@@ -3206,7 +3051,7 @@ static int read_attr(int fd, struct perf_header *ph,
static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
struct tep_handle *pevent)
{
- struct tep_event_format *event;
+ struct tep_event *event;
char bf[128];
/* already prepared */
@@ -3521,7 +3366,7 @@ perf_event__synthesize_event_update_unit(struct perf_tool *tool,
if (ev == NULL)
return -ENOMEM;
- strncpy(ev->data, evsel->unit, size);
+ strlcpy(ev->data, evsel->unit, size + 1);
err = process(tool, (union perf_event *)ev, NULL, NULL);
free(ev);
return err;
@@ -3560,7 +3405,7 @@ perf_event__synthesize_event_update_name(struct perf_tool *tool,
if (ev == NULL)
return -ENOMEM;
- strncpy(ev->data, evsel->name, len);
+ strlcpy(ev->data, evsel->name, len + 1);
err = process(tool, (union perf_event*) ev, NULL, NULL);
free(ev);
return err;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e17903caa71d..0d553ddca0a3 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -38,6 +38,7 @@ enum {
HEADER_CACHE,
HEADER_SAMPLE_TIME,
HEADER_MEM_TOPOLOGY,
+ HEADER_CLOCKID,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 828cb9794c76..669f961316f0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include "callchain.h"
#include "util.h"
#include "build-id.h"
#include "hist.h"
@@ -11,6 +12,7 @@
#include "evsel.h"
#include "annotate.h"
#include "srcline.h"
+#include "symbol.h"
#include "thread.h"
#include "ui/progress.h"
#include <errno.h>
@@ -209,7 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
void hists__output_recalc_col_len(struct hists *hists, int max_rows)
{
- struct rb_node *next = rb_first(&hists->entries);
+ struct rb_node *next = rb_first_cached(&hists->entries);
struct hist_entry *n;
int row = 0;
@@ -296,7 +298,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
if (!he->leaf) {
struct hist_entry *child;
- struct rb_node *node = rb_first(&he->hroot_out);
+ struct rb_node *node = rb_first_cached(&he->hroot_out);
while (node) {
child = rb_entry(node, struct hist_entry, rb_node);
node = rb_next(node);
@@ -311,8 +313,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
{
- struct rb_root *root_in;
- struct rb_root *root_out;
+ struct rb_root_cached *root_in;
+ struct rb_root_cached *root_out;
if (he->parent_he) {
root_in = &he->parent_he->hroot_in;
@@ -325,8 +327,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
root_out = &hists->entries;
}
- rb_erase(&he->rb_node_in, root_in);
- rb_erase(&he->rb_node, root_out);
+ rb_erase_cached(&he->rb_node_in, root_in);
+ rb_erase_cached(&he->rb_node, root_out);
--hists->nr_entries;
if (!he->filtered)
@@ -337,7 +339,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
{
- struct rb_node *next = rb_first(&hists->entries);
+ struct rb_node *next = rb_first_cached(&hists->entries);
struct hist_entry *n;
while (next) {
@@ -353,7 +355,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
void hists__delete_entries(struct hists *hists)
{
- struct rb_node *next = rb_first(&hists->entries);
+ struct rb_node *next = rb_first_cached(&hists->entries);
struct hist_entry *n;
while (next) {
@@ -435,8 +437,8 @@ static int hist_entry__init(struct hist_entry *he,
}
INIT_LIST_HEAD(&he->pairs.node);
thread__get(he->thread);
- he->hroot_in = RB_ROOT;
- he->hroot_out = RB_ROOT;
+ he->hroot_in = RB_ROOT_CACHED;
+ he->hroot_out = RB_ROOT_CACHED;
if (!symbol_conf.report_hierarchy)
he->leaf = true;
@@ -513,8 +515,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
int64_t cmp;
u64 period = entry->stat.period;
u64 weight = entry->stat.weight;
+ bool leftmost = true;
- p = &hists->entries_in->rb_node;
+ p = &hists->entries_in->rb_root.rb_node;
while (*p != NULL) {
parent = *p;
@@ -557,8 +560,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
if (cmp < 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
he = hist_entry__new(entry, sample_self);
@@ -570,7 +575,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
hists->nr_entries++;
rb_link_node(&he->rb_node_in, parent, p);
- rb_insert_color(&he->rb_node_in, hists->entries_in);
+ rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost);
out:
if (sample_self)
he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
@@ -1160,7 +1165,7 @@ void hist_entry__delete(struct hist_entry *he)
/*
* If this is not the last column, then we need to pad it according to the
- * pre-calculated max lenght for this column, otherwise don't bother adding
+ * pre-calculated max length for this column, otherwise don't bother adding
* spaces because that would break viewing this with, for instance, 'less',
* that would show tons of trailing spaces when a long C++ demangled method
* names is sampled.
@@ -1279,16 +1284,17 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
}
static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
- struct rb_root *root,
+ struct rb_root_cached *root,
struct hist_entry *he,
struct hist_entry *parent_he,
struct perf_hpp_list *hpp_list)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter, *new;
struct perf_hpp_fmt *fmt;
int64_t cmp;
+ bool leftmost = true;
while (*p != NULL) {
parent = *p;
@@ -1308,8 +1314,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
if (cmp < 0)
p = &parent->rb_left;
- else
+ else {
p = &parent->rb_right;
+ leftmost = false;
+ }
}
new = hist_entry__new(he, true);
@@ -1343,12 +1351,12 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
}
rb_link_node(&new->rb_node_in, parent, p);
- rb_insert_color(&new->rb_node_in, root);
+ rb_insert_color_cached(&new->rb_node_in, root, leftmost);
return new;
}
static int hists__hierarchy_insert_entry(struct hists *hists,
- struct rb_root *root,
+ struct rb_root_cached *root,
struct hist_entry *he)
{
struct perf_hpp_list_node *node;
@@ -1395,13 +1403,14 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
}
static int hists__collapse_insert_entry(struct hists *hists,
- struct rb_root *root,
+ struct rb_root_cached *root,
struct hist_entry *he)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;
int64_t cmp;
+ bool leftmost = true;
if (symbol_conf.report_hierarchy)
return hists__hierarchy_insert_entry(hists, root, he);
@@ -1432,19 +1441,21 @@ static int hists__collapse_insert_entry(struct hists *hists,
if (cmp < 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
hists->nr_entries++;
rb_link_node(&he->rb_node_in, parent, p);
- rb_insert_color(&he->rb_node_in, root);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
return 1;
}
-struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
pthread_mutex_lock(&hists->lock);
@@ -1467,7 +1478,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
struct rb_node *next;
struct hist_entry *n;
int ret;
@@ -1479,7 +1490,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
root = hists__get_rotate_entries_in(hists);
- next = rb_first(root);
+ next = rb_first_cached(root);
while (next) {
if (session_done())
@@ -1487,7 +1498,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);
- rb_erase(&n->rb_node_in, root);
+ rb_erase_cached(&n->rb_node_in, root);
ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
if (ret < 0)
return -1;
@@ -1558,7 +1569,7 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
struct rb_node *node;
struct hist_entry *he;
- node = rb_first(&hists->entries);
+ node = rb_first_cached(&hists->entries);
hists->stats.total_period = 0;
hists->stats.total_non_filtered_period = 0;
@@ -1578,13 +1589,14 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
}
}
-static void hierarchy_insert_output_entry(struct rb_root *root,
+static void hierarchy_insert_output_entry(struct rb_root_cached *root,
struct hist_entry *he)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;
struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
while (*p != NULL) {
parent = *p;
@@ -1592,12 +1604,14 @@ static void hierarchy_insert_output_entry(struct rb_root *root,
if (hist_entry__sort(he, iter) > 0)
p = &parent->rb_left;
- else
+ else {
p = &parent->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&he->rb_node, parent, p);
- rb_insert_color(&he->rb_node, root);
+ rb_insert_color_cached(&he->rb_node, root, leftmost);
/* update column width of dynamic entry */
perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
@@ -1608,16 +1622,16 @@ static void hierarchy_insert_output_entry(struct rb_root *root,
static void hists__hierarchy_output_resort(struct hists *hists,
struct ui_progress *prog,
- struct rb_root *root_in,
- struct rb_root *root_out,
+ struct rb_root_cached *root_in,
+ struct rb_root_cached *root_out,
u64 min_callchain_hits,
bool use_callchain)
{
struct rb_node *node;
struct hist_entry *he;
- *root_out = RB_ROOT;
- node = rb_first(root_in);
+ *root_out = RB_ROOT_CACHED;
+ node = rb_first_cached(root_in);
while (node) {
he = rb_entry(node, struct hist_entry, rb_node_in);
@@ -1660,15 +1674,16 @@ static void hists__hierarchy_output_resort(struct hists *hists,
}
}
-static void __hists__insert_output_entry(struct rb_root *entries,
+static void __hists__insert_output_entry(struct rb_root_cached *entries,
struct hist_entry *he,
u64 min_callchain_hits,
bool use_callchain)
{
- struct rb_node **p = &entries->rb_node;
+ struct rb_node **p = &entries->rb_root.rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;
struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
if (use_callchain) {
if (callchain_param.mode == CHAIN_GRAPH_REL) {
@@ -1689,12 +1704,14 @@ static void __hists__insert_output_entry(struct rb_root *entries,
if (hist_entry__sort(he, iter) > 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&he->rb_node, parent, p);
- rb_insert_color(&he->rb_node, entries);
+ rb_insert_color_cached(&he->rb_node, entries, leftmost);
perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
if (perf_hpp__is_dynamic_entry(fmt) &&
@@ -1704,9 +1721,10 @@ static void __hists__insert_output_entry(struct rb_root *entries,
}
static void output_resort(struct hists *hists, struct ui_progress *prog,
- bool use_callchain, hists__resort_cb_t cb)
+ bool use_callchain, hists__resort_cb_t cb,
+ void *cb_arg)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
struct rb_node *next;
struct hist_entry *n;
u64 callchain_total;
@@ -1736,14 +1754,14 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
else
root = hists->entries_in;
- next = rb_first(root);
- hists->entries = RB_ROOT;
+ next = rb_first_cached(root);
+ hists->entries = RB_ROOT_CACHED;
while (next) {
n = rb_entry(next, struct hist_entry, rb_node_in);
next = rb_next(&n->rb_node_in);
- if (cb && cb(n))
+ if (cb && cb(n, cb_arg))
continue;
__hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
@@ -1757,7 +1775,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
}
}
-void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg)
{
bool use_callchain;
@@ -1768,18 +1787,23 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro
use_callchain |= symbol_conf.show_branchflag_count;
- output_resort(evsel__hists(evsel), prog, use_callchain, NULL);
+ output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
+}
+
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+ return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL);
}
void hists__output_resort(struct hists *hists, struct ui_progress *prog)
{
- output_resort(hists, prog, symbol_conf.use_callchain, NULL);
+ output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL);
}
void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
hists__resort_cb_t cb)
{
- output_resort(hists, prog, symbol_conf.use_callchain, cb);
+ output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL);
}
static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
@@ -1798,7 +1822,7 @@ struct rb_node *rb_hierarchy_last(struct rb_node *node)
struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
while (can_goto_child(he, HMD_NORMAL)) {
- node = rb_last(&he->hroot_out);
+ node = rb_last(&he->hroot_out.rb_root);
he = rb_entry(node, struct hist_entry, rb_node);
}
return node;
@@ -1809,7 +1833,7 @@ struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_di
struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
if (can_goto_child(he, hmd))
- node = rb_first(&he->hroot_out);
+ node = rb_first_cached(&he->hroot_out);
else
node = rb_next(node);
@@ -1847,7 +1871,7 @@ bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
if (he->leaf)
return false;
- node = rb_first(&he->hroot_out);
+ node = rb_first_cached(&he->hroot_out);
child = rb_entry(node, struct hist_entry, rb_node);
while (node && child->filtered) {
@@ -1965,7 +1989,7 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil
hists__reset_filter_stats(hists);
hists__reset_col_len(hists);
- for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
if (filter(hists, h))
@@ -1975,13 +1999,15 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil
}
}
-static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+static void resort_filtered_entry(struct rb_root_cached *root,
+ struct hist_entry *he)
{
- struct rb_node **p = &root->rb_node;
+ struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *parent = NULL;
struct hist_entry *iter;
- struct rb_root new_root = RB_ROOT;
+ struct rb_root_cached new_root = RB_ROOT_CACHED;
struct rb_node *nd;
+ bool leftmost = true;
while (*p != NULL) {
parent = *p;
@@ -1989,22 +2015,24 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
if (hist_entry__sort(he, iter) > 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&he->rb_node, parent, p);
- rb_insert_color(&he->rb_node, root);
+ rb_insert_color_cached(&he->rb_node, root, leftmost);
if (he->leaf || he->filtered)
return;
- nd = rb_first(&he->hroot_out);
+ nd = rb_first_cached(&he->hroot_out);
while (nd) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
nd = rb_next(nd);
- rb_erase(&h->rb_node, &he->hroot_out);
+ rb_erase_cached(&h->rb_node, &he->hroot_out);
resort_filtered_entry(&new_root, h);
}
@@ -2015,14 +2043,14 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
{
struct rb_node *nd;
- struct rb_root new_root = RB_ROOT;
+ struct rb_root_cached new_root = RB_ROOT_CACHED;
hists->stats.nr_non_filtered_samples = 0;
hists__reset_filter_stats(hists);
hists__reset_col_len(hists);
- nd = rb_first(&hists->entries);
+ nd = rb_first_cached(&hists->entries);
while (nd) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
int ret;
@@ -2066,12 +2094,12 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a
* resort output after applying a new filter since filter in a lower
* hierarchy can change periods in a upper hierarchy.
*/
- nd = rb_first(&hists->entries);
+ nd = rb_first_cached(&hists->entries);
while (nd) {
struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
nd = rb_next(nd);
- rb_erase(&h->rb_node, &hists->entries);
+ rb_erase_cached(&h->rb_node, &hists->entries);
resort_filtered_entry(&new_root, h);
}
@@ -2140,18 +2168,19 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered)
static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
struct hist_entry *pair)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
int64_t cmp;
+ bool leftmost = true;
if (hists__has(hists, need_collapse))
root = &hists->entries_collapsed;
else
root = hists->entries_in;
- p = &root->rb_node;
+ p = &root->rb_root.rb_node;
while (*p != NULL) {
parent = *p;
@@ -2164,8 +2193,10 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
if (cmp < 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
he = hist_entry__new(pair, true);
@@ -2175,7 +2206,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
if (symbol_conf.cumulate_callchain)
memset(he->stat_acc, 0, sizeof(he->stat));
rb_link_node(&he->rb_node_in, parent, p);
- rb_insert_color(&he->rb_node_in, root);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
hists__inc_stats(hists, he);
he->dummy = true;
}
@@ -2184,15 +2215,16 @@ out:
}
static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
- struct rb_root *root,
+ struct rb_root_cached *root,
struct hist_entry *pair)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
struct perf_hpp_fmt *fmt;
+ bool leftmost = true;
- p = &root->rb_node;
+ p = &root->rb_root.rb_node;
while (*p != NULL) {
int64_t cmp = 0;
@@ -2209,14 +2241,16 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
if (cmp < 0)
p = &parent->rb_left;
- else
+ else {
p = &parent->rb_right;
+ leftmost = false;
+ }
}
he = hist_entry__new(pair, true);
if (he) {
rb_link_node(&he->rb_node_in, parent, p);
- rb_insert_color(&he->rb_node_in, root);
+ rb_insert_color_cached(&he->rb_node_in, root, leftmost);
he->dummy = true;
he->hists = hists;
@@ -2233,9 +2267,9 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
struct rb_node *n;
if (hists__has(hists, need_collapse))
- n = hists->entries_collapsed.rb_node;
+ n = hists->entries_collapsed.rb_root.rb_node;
else
- n = hists->entries_in->rb_node;
+ n = hists->entries_in->rb_root.rb_node;
while (n) {
struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
@@ -2252,10 +2286,10 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
return NULL;
}
-static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root,
struct hist_entry *he)
{
- struct rb_node *n = root->rb_node;
+ struct rb_node *n = root->rb_root.rb_node;
while (n) {
struct hist_entry *iter;
@@ -2280,13 +2314,13 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
return NULL;
}
-static void hists__match_hierarchy(struct rb_root *leader_root,
- struct rb_root *other_root)
+static void hists__match_hierarchy(struct rb_root_cached *leader_root,
+ struct rb_root_cached *other_root)
{
struct rb_node *nd;
struct hist_entry *pos, *pair;
- for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct hist_entry, rb_node_in);
pair = hists__find_hierarchy_entry(other_root, pos);
@@ -2302,7 +2336,7 @@ static void hists__match_hierarchy(struct rb_root *leader_root,
*/
void hists__match(struct hists *leader, struct hists *other)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
struct rb_node *nd;
struct hist_entry *pos, *pair;
@@ -2317,7 +2351,7 @@ void hists__match(struct hists *leader, struct hists *other)
else
root = leader->entries_in;
- for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct hist_entry, rb_node_in);
pair = hists__find_entry(other, pos);
@@ -2328,13 +2362,13 @@ void hists__match(struct hists *leader, struct hists *other)
static int hists__link_hierarchy(struct hists *leader_hists,
struct hist_entry *parent,
- struct rb_root *leader_root,
- struct rb_root *other_root)
+ struct rb_root_cached *leader_root,
+ struct rb_root_cached *other_root)
{
struct rb_node *nd;
struct hist_entry *pos, *leader;
- for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct hist_entry, rb_node_in);
if (hist_entry__has_pairs(pos)) {
@@ -2377,7 +2411,7 @@ static int hists__link_hierarchy(struct hists *leader_hists,
*/
int hists__link(struct hists *leader, struct hists *other)
{
- struct rb_root *root;
+ struct rb_root_cached *root;
struct rb_node *nd;
struct hist_entry *pos, *pair;
@@ -2393,7 +2427,7 @@ int hists__link(struct hists *leader, struct hists *other)
else
root = other->entries_in;
- for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct hist_entry, rb_node_in);
if (!hist_entry__has_pairs(pos)) {
@@ -2566,10 +2600,10 @@ int perf_hist_config(const char *var, const char *value)
int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
{
memset(hists, 0, sizeof(*hists));
- hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
+ hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED;
hists->entries_in = &hists->entries_in_array[0];
- hists->entries_collapsed = RB_ROOT;
- hists->entries = RB_ROOT;
+ hists->entries_collapsed = RB_ROOT_CACHED;
+ hists->entries = RB_ROOT_CACHED;
pthread_mutex_init(&hists->lock, NULL);
hists->socket_filter = -1;
hists->hpp_list = hpp_list;
@@ -2577,14 +2611,14 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
return 0;
}
-static void hists__delete_remaining_entries(struct rb_root *root)
+static void hists__delete_remaining_entries(struct rb_root_cached *root)
{
struct rb_node *node;
struct hist_entry *he;
- while (!RB_EMPTY_ROOT(root)) {
- node = rb_first(root);
- rb_erase(node, root);
+ while (!RB_EMPTY_ROOT(&root->rb_root)) {
+ node = rb_first_cached(root);
+ rb_erase_cached(node, root);
he = rb_entry(node, struct hist_entry, rb_node_in);
hist_entry__delete(he);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 3badd7f1e1b8..4af27fbab24f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -2,9 +2,9 @@
#ifndef __PERF_HIST_H
#define __PERF_HIST_H
+#include <linux/rbtree.h>
#include <linux/types.h>
#include <pthread.h>
-#include "callchain.h"
#include "evsel.h"
#include "header.h"
#include "color.h"
@@ -13,6 +13,9 @@
struct hist_entry;
struct hist_entry_ops;
struct addr_location;
+struct map_symbol;
+struct mem_info;
+struct branch_info;
struct symbol;
enum hist_filter {
@@ -62,6 +65,7 @@ enum hist_column {
HISTC_TRACE,
HISTC_SYM_SIZE,
HISTC_DSO_SIZE,
+ HISTC_SYMBOL_IPC,
HISTC_NR_COLS, /* Last entry */
};
@@ -69,10 +73,10 @@ struct thread;
struct dso;
struct hists {
- struct rb_root entries_in_array[2];
- struct rb_root *entries_in;
- struct rb_root entries;
- struct rb_root entries_collapsed;
+ struct rb_root_cached entries_in_array[2];
+ struct rb_root_cached *entries_in;
+ struct rb_root_cached entries;
+ struct rb_root_cached entries_collapsed;
u64 nr_entries;
u64 nr_non_filtered_entries;
u64 callchain_period;
@@ -159,8 +163,10 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
struct perf_hpp_fmt *fmt, int printed);
void hist_entry__delete(struct hist_entry *he);
-typedef int (*hists__resort_cb_t)(struct hist_entry *he);
+typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
+void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg);
void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
void hists__output_resort(struct hists *hists, struct ui_progress *prog);
void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
@@ -229,7 +235,7 @@ static __pure inline bool hists__has_callchains(struct hists *hists)
int hists__init(void);
int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
-struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
+struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists);
struct perf_hpp {
char *buf;
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
index 6a6f4b990547..548100315710 100644
--- a/tools/perf/util/include/asm/uaccess.h
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -10,6 +10,6 @@
#define get_user __get_user
-#define access_ok(type, addr, size) 1
+#define access_ok(addr, size) 1
#endif
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 7f0c83b6332b..0c0180c67574 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -27,6 +27,8 @@
#include "evsel.h"
#include "evlist.h"
#include "machine.h"
+#include "map.h"
+#include "symbol.h"
#include "session.h"
#include "util.h"
#include "thread.h"
@@ -142,7 +144,7 @@ static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
- sample->tid, 0, "Lost trace data");
+ sample->tid, 0, "Lost trace data", sample->time);
err = perf_session__deliver_synth_event(bts->session, &event, NULL);
if (err)
@@ -269,6 +271,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
return 0;
}
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+ return machine__kernel_ip(bts->machine, ip) ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
struct branch *branch)
{
@@ -281,12 +290,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
bts->num_events++ <= bts->synth_opts.initial_skip)
return 0;
- event.sample.header.type = PERF_RECORD_SAMPLE;
- event.sample.header.misc = PERF_RECORD_MISC_USER;
- event.sample.header.size = sizeof(struct perf_event_header);
-
- sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = le64_to_cpu(branch->from);
+ sample.cpumode = intel_bts_cpumode(bts, sample.ip);
sample.pid = btsq->pid;
sample.tid = btsq->tid;
sample.addr = le64_to_cpu(branch->to);
@@ -298,6 +303,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
sample.insn_len = btsq->intel_pt_insn.length;
memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
if (bts->synth_opts.inject) {
event.sample.header.size = bts->branches_event_size;
ret = perf_event__synthesize_sample(&event,
@@ -365,7 +374,7 @@ static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
- "Failed to get instruction");
+ "Failed to get instruction", 0);
err = perf_session__deliver_synth_event(bts->session, &event, NULL);
if (err)
@@ -444,7 +453,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
continue;
intel_bts_get_branch_type(btsq, branch);
if (btsq->bts->synth_opts.thread_stack)
- thread_stack__event(thread, btsq->sample_flags,
+ thread_stack__event(thread, btsq->cpu, btsq->sample_flags,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
@@ -516,7 +525,7 @@ static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
!btsq->bts->synth_opts.thread_stack && thread &&
(!old_buffer || btsq->bts->sampling_mode ||
(btsq->bts->snapshot_mode && !buffer->consecutive)))
- thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+ thread_stack__set_trace_nr(thread, btsq->cpu, buffer->buffer_nr + 1);
err = intel_bts_process_buffer(btsq, buffer, thread);
@@ -910,7 +919,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
bts->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&bts->synth_opts);
+ itrace_synth_opts__set_default(&bts->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
if (session->itrace_synth_opts)
bts->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 1b704fbea9de..23bf788f84b9 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -1,4 +1,4 @@
-libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 58f6a9ceb590..6e03db142091 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -26,6 +26,7 @@
#include "../cache.h"
#include "../util.h"
+#include "../auxtrace.h"
#include "intel-pt-insn-decoder.h"
#include "intel-pt-pkt-decoder.h"
@@ -867,7 +868,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
ret = intel_pt_get_packet(decoder->buf, decoder->len,
&decoder->packet);
- if (ret == INTEL_PT_NEED_MORE_BYTES &&
+ if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
ret = intel_pt_get_split_packet(decoder);
if (ret < 0)
@@ -1394,7 +1395,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
{
intel_pt_log("ERROR: Buffer overflow\n");
intel_pt_clear_tx_flags(decoder);
- decoder->cbr = 0;
decoder->timestamp_insn_cnt = 0;
decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
decoder->overflow = true;
@@ -1474,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
decoder->have_calc_cyc_to_tsc = false;
intel_pt_calc_cyc_to_tsc(decoder, true);
}
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
@@ -1514,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
decoder->timestamp = timestamp;
decoder->timestamp_insn_cnt = 0;
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
}
/* Walk PSB+ packets when already in sync. */
@@ -2571,6 +2575,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
}
}
+#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
+
+/**
+ * adj_for_padding - adjust overlap to account for padding.
+ * @buf_b: second buffer
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ *
+ * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
+ * accordingly.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts
+ */
+static unsigned char *adj_for_padding(unsigned char *buf_b,
+ unsigned char *buf_a, size_t len_a)
+{
+ unsigned char *p = buf_b - MAX_PADDING;
+ unsigned char *q = buf_a + len_a - MAX_PADDING;
+ int i;
+
+ for (i = MAX_PADDING; i; i--, p++, q++) {
+ if (*p != *q)
+ break;
+ }
+
+ return p;
+}
+
/**
* intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
* using TSC.
@@ -2621,8 +2653,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
/* Same TSC, so buffers are consecutive */
if (!cmp && rem_b >= rem_a) {
+ unsigned char *start;
+
*consecutive = true;
- return buf_b + len_b - (rem_b - rem_a);
+ start = buf_b + len_b - (rem_b - rem_a);
+ return adj_for_padding(start, buf_a, len_a);
}
if (cmp < 0)
return buf_b; /* tsc_a < tsc_b => no overlap */
@@ -2685,7 +2720,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
found = memmem(buf_a, len_a, buf_b, len_a);
if (found) {
*consecutive = true;
- return buf_b + len_a;
+ return adj_for_padding(buf_b + len_a, buf_a, len_a);
}
/* Try again at next PSB in buffer 'a' */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 54818828023b..1c0e289f01e6 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -180,6 +180,14 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
return 0;
}
+int arch_is_branch(const unsigned char *buf, size_t len, int x86_64)
+{
+ struct intel_pt_insn in;
+ if (intel_pt_get_insn(buf, len, x86_64, &in) < 0)
+ return -1;
+ return in.branch != INTEL_PT_BR_NO_BRANCH;
+}
+
const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
index e02bc7b166a0..5e64da270f97 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -31,6 +31,11 @@ static FILE *f;
static char log_name[MAX_LOG_NAME];
bool intel_pt_enable_logging;
+void *intel_pt_log_fp(void)
+{
+ return f;
+}
+
void intel_pt_log_enable(void)
{
intel_pt_enable_logging = true;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
index 45b64f93f358..cc084937f701 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -22,6 +22,7 @@
struct intel_pt_pkt;
+void *intel_pt_log_fp(void);
void intel_pt_log_enable(void);
void intel_pt_log_disable(void);
void intel_pt_log_set_name(const char *name);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 48c1d415c6b0..3b497bab4324 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
intel_pt_dump(pt, buf, len);
}
+static void intel_pt_log_event(union perf_event *event)
+{
+ FILE *f = intel_pt_log_fp();
+
+ if (!intel_pt_enable_logging || !f)
+ return;
+
+ perf_event__fprintf(event, f);
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -407,6 +417,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+ return ip >= pt->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
uint64_t *insn_cnt_ptr, uint64_t *ip,
uint64_t to_ip, uint64_t max_insn_cnt,
@@ -429,10 +446,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
if (to_ip && *ip == to_ip)
goto out_no_cache;
- if (*ip >= ptq->pt->kernel_start)
- cpumode = PERF_RECORD_MISC_KERNEL;
- else
- cpumode = PERF_RECORD_MISC_USER;
+ cpumode = intel_pt_cpumode(ptq->pt, *ip);
thread = ptq->thread;
if (!thread) {
@@ -759,7 +773,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
if (pt->synth_opts.callchain) {
size_t sz = sizeof(struct ip_callchain);
- sz += pt->synth_opts.callchain_sz * sizeof(u64);
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
ptq->chain = zalloc(sz);
if (!ptq->chain)
goto out_free;
@@ -1058,15 +1073,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
{
- event->sample.header.type = PERF_RECORD_SAMPLE;
- event->sample.header.misc = PERF_RECORD_MISC_USER;
- event->sample.header.size = sizeof(struct perf_event_header);
-
if (!pt->timeless_decoding)
sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
- sample->cpumode = PERF_RECORD_MISC_USER;
sample->ip = ptq->state->from_ip;
+ sample->cpumode = intel_pt_cpumode(pt, sample->ip);
sample->pid = ptq->pid;
sample->tid = ptq->tid;
sample->addr = ptq->state->to_ip;
@@ -1075,6 +1086,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,
sample->flags = ptq->flags;
sample->insn_len = ptq->insn_len;
memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
}
static int intel_pt_inject_event(union perf_event *event,
@@ -1159,8 +1174,9 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
intel_pt_prep_b_sample(pt, ptq, event, sample);
if (pt->synth_opts.callchain) {
- thread_stack__sample(ptq->thread, ptq->chain,
- pt->synth_opts.callchain_sz, sample->ip);
+ thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
+ pt->synth_opts.callchain_sz + 1,
+ sample->ip, pt->kernel_start);
sample->callchain = ptq->chain;
}
@@ -1395,7 +1411,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
- pid_t pid, pid_t tid, u64 ip)
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
union perf_event event;
char msg[MAX_AUXTRACE_ERROR_MSG];
@@ -1404,7 +1420,7 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
- code, cpu, pid, tid, ip, msg);
+ code, cpu, pid, tid, ip, msg, timestamp);
err = perf_session__deliver_synth_event(pt->session, &event, NULL);
if (err)
@@ -1414,6 +1430,18 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
return err;
}
+static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
+ const struct intel_pt_state *state)
+{
+ struct intel_pt *pt = ptq->pt;
+ u64 tm = ptq->timestamp;
+
+ tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
+
+ return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
+ ptq->tid, state->from_ip, tm);
+}
+
static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
{
struct auxtrace_queue *queue;
@@ -1510,11 +1538,11 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
- thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
state->to_ip, ptq->insn_len,
state->trace_nr);
else
- thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+ thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
@@ -1660,10 +1688,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
intel_pt_next_tid(pt, ptq);
}
if (pt->synth_opts.errors) {
- err = intel_pt_synth_error(pt, state->err,
- ptq->cpu, ptq->pid,
- ptq->tid,
- state->from_ip);
+ err = intel_ptq_synth_error(ptq, state);
if (err)
return err;
}
@@ -1788,7 +1813,7 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
{
return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
- sample->pid, sample->tid, 0);
+ sample->pid, sample->tid, 0, sample->time);
}
static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
@@ -2004,9 +2029,9 @@ static int intel_pt_process_event(struct perf_session *session,
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
- intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
- perf_event__name(event->header.type), event->header.type,
- sample->cpu, sample->time, timestamp);
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+ event->header.type, sample->cpu, sample->time, timestamp);
+ intel_pt_log_event(event);
return err;
}
@@ -2559,7 +2584,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
pt->synth_opts = *session->itrace_synth_opts;
} else {
- itrace_synth_opts__set_default(&pt->synth_opts);
+ itrace_synth_opts__set_default(&pt->synth_opts,
+ session->itrace_synth_opts->default_no_sample);
if (use_browser != -1) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 85bab8735fa9..5c19ee001299 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -45,7 +45,7 @@ static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
/* For intlist iteration */
static inline struct int_node *intlist__first(struct intlist *ilist)
{
- struct rb_node *rn = rb_first(&ilist->rblist.entries);
+ struct rb_node *rn = rb_first_cached(&ilist->rblist.entries);
return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
}
static inline struct int_node *intlist__next(struct int_node *in)
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index a1863000e972..eda28d3570bc 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -2,6 +2,7 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <errno.h>
+#include <libgen.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -38,7 +39,7 @@ struct jit_buf_desc {
uint64_t sample_type;
size_t bufsize;
FILE *in;
- bool needs_bswap; /* handles cross-endianess */
+ bool needs_bswap; /* handles cross-endianness */
bool use_arch_timestamp;
void *debug_data;
void *unwinding_data;
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 7b1f06567521..1403dec189b4 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -3,12 +3,13 @@
#define __PERF_KVM_STAT_H
#include "../perf.h"
-#include "evsel.h"
-#include "evlist.h"
-#include "session.h"
#include "tool.h"
#include "stat.h"
+struct perf_evsel;
+struct perf_evlist;
+struct perf_session;
+
struct event_key {
#define INVALID_KEY (~0ULL)
u64 key;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..61959aba7e27 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -10,6 +10,7 @@
#include "hist.h"
#include "machine.h"
#include "map.h"
+#include "symbol.h"
#include "sort.h"
#include "strlist.h"
#include "thread.h"
@@ -21,6 +22,7 @@
#include "unwind.h"
#include "linux/hash.h"
#include "asm/bug.h"
+#include "bpf-event.h"
#include "sane_ctype.h"
#include <symbol/kallsyms.h>
@@ -41,7 +43,7 @@ static void machine__threads_init(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
- threads->entries = RB_ROOT;
+ threads->entries = RB_ROOT_CACHED;
init_rwsem(&threads->lock);
threads->nr = 0;
INIT_LIST_HEAD(&threads->dead);
@@ -137,7 +139,7 @@ struct machine *machine__new_kallsyms(void)
struct machine *machine = machine__new_host();
/*
* FIXME:
- * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
+ * 1) We should switch to machine__load_kallsyms(), i.e. not explicitly
* ask for not using the kcore parsing code, once this one is fixed
* to create a map per module.
*/
@@ -179,7 +181,7 @@ void machine__delete_threads(struct machine *machine)
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
down_write(&threads->lock);
- nd = rb_first(&threads->entries);
+ nd = rb_first_cached(&threads->entries);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
@@ -222,7 +224,7 @@ void machine__delete(struct machine *machine)
void machines__init(struct machines *machines)
{
machine__init(&machines->host, "", HOST_KERNEL_ID);
- machines->guests = RB_ROOT;
+ machines->guests = RB_ROOT_CACHED;
}
void machines__exit(struct machines *machines)
@@ -234,9 +236,10 @@ void machines__exit(struct machines *machines)
struct machine *machines__add(struct machines *machines, pid_t pid,
const char *root_dir)
{
- struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
struct rb_node *parent = NULL;
struct machine *pos, *machine = malloc(sizeof(*machine));
+ bool leftmost = true;
if (machine == NULL)
return NULL;
@@ -251,12 +254,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
pos = rb_entry(parent, struct machine, rb_node);
if (pid < pos->pid)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&machine->rb_node, parent, p);
- rb_insert_color(&machine->rb_node, &machines->guests);
+ rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
return machine;
}
@@ -267,7 +272,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
machines->host.comm_exec = comm_exec;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *machine = rb_entry(nd, struct machine, rb_node);
machine->comm_exec = comm_exec;
@@ -276,7 +281,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
struct machine *machines__find(struct machines *machines, pid_t pid)
{
- struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node **p = &machines->guests.rb_root.rb_node;
struct rb_node *parent = NULL;
struct machine *machine;
struct machine *default_machine = NULL;
@@ -339,7 +344,7 @@ void machines__process_guests(struct machines *machines,
{
struct rb_node *nd;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
process(pos, data);
}
@@ -352,7 +357,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
machines->host.id_hdr_size = id_hdr_size;
- for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
+ for (node = rb_first_cached(&machines->guests); node;
+ node = rb_next(node)) {
machine = rb_entry(node, struct machine, rb_node);
machine->id_hdr_size = id_hdr_size;
}
@@ -465,9 +471,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
{
- struct rb_node **p = &threads->entries.rb_node;
+ struct rb_node **p = &threads->entries.rb_root.rb_node;
struct rb_node *parent = NULL;
struct thread *th;
+ bool leftmost = true;
th = threads__get_last_match(threads, machine, pid, tid);
if (th)
@@ -485,8 +492,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
if (tid < th->tid)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
if (!create)
@@ -495,7 +504,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
- rb_insert_color(&th->rb_node, &threads->entries);
+ rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
/*
* We have to initialize map_groups separately
@@ -506,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
- rb_erase_init(&th->rb_node, &threads->entries);
+ rb_erase_cached(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
@@ -681,6 +690,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static int machine__process_ksymbol_register(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct symbol *sym;
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (!map) {
+ map = dso__new_map(event->ksymbol_event.name);
+ if (!map)
+ return -ENOMEM;
+
+ map->start = event->ksymbol_event.addr;
+ map->pgoff = map->start;
+ map->end = map->start + event->ksymbol_event.len;
+ map_groups__insert(&machine->kmaps, map);
+ }
+
+ sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
+ 0, 0, event->ksymbol_event.name);
+ if (!sym)
+ return -ENOMEM;
+ dso__insert_symbol(map->dso, sym);
+ return 0;
+}
+
+static int machine__process_ksymbol_unregister(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map;
+
+ map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
+ if (map)
+ map_groups__remove(&machine->kmaps, map);
+
+ return 0;
+}
+
+int machine__process_ksymbol(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (dump_trace)
+ perf_event__fprintf_ksymbol(event, stdout);
+
+ if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
+ return machine__process_ksymbol_unregister(machine, event,
+ sample);
+ return machine__process_ksymbol_register(machine, event, sample);
+}
+
static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
{
const char *dup_filename;
@@ -744,7 +806,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
struct rb_node *nd;
size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret += __dsos__fprintf(&pos->dsos.head, fp);
}
@@ -764,7 +826,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
struct rb_node *nd;
size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *pos = rb_entry(nd, struct machine, rb_node);
ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
}
@@ -804,7 +866,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
ret = fprintf(fp, "Threads: %u\n", threads->nr);
- for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
ret += thread__fprintf(pos, fp);
@@ -1107,7 +1170,7 @@ failure:
void machines__destroy_kernel_maps(struct machines *machines)
{
- struct rb_node *next = rb_first(&machines->guests);
+ struct rb_node *next = rb_first_cached(&machines->guests);
machine__destroy_kernel_maps(&machines->host);
@@ -1115,7 +1178,7 @@ void machines__destroy_kernel_maps(struct machines *machines)
struct machine *pos = rb_entry(next, struct machine, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, &machines->guests);
+ rb_erase_cached(&pos->rb_node, &machines->guests);
machine__delete(pos);
}
}
@@ -1680,7 +1743,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
down_write(&threads->lock);
- rb_erase_init(&th->rb_node, &threads->entries);
+ rb_erase_cached(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
--threads->nr;
/*
@@ -1708,6 +1771,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
+ bool do_maps_clone = true;
int err = 0;
if (dump_trace)
@@ -1736,9 +1800,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);
+ /*
+ * When synthesizing FORK events, we are trying to create thread
+ * objects for the already running tasks on the machine.
+ *
+ * Normally, for a kernel FORK event, we want to clone the parent's
+ * maps because that is what the kernel just did.
+ *
+ * But when synthesizing, this should not be done. If we do, we end up
+ * with overlapping maps as we process the sythesized MMAP2 events that
+ * get delivered shortly thereafter.
+ *
+ * Use the FORK event misc flags in an internal way to signal this
+ * situation, so we can elide the map clone when appropriate.
+ */
+ if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC)
+ do_maps_clone = false;
if (thread == NULL || parent == NULL ||
- thread__fork(thread, parent, sample->time) < 0) {
+ thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
err = -1;
}
@@ -1795,6 +1875,10 @@ int machine__process_event(struct machine *machine, union perf_event *event,
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
ret = machine__process_switch_event(machine, event); break;
+ case PERF_RECORD_KSYMBOL:
+ ret = machine__process_ksymbol(machine, event, sample); break;
+ case PERF_RECORD_BPF_EVENT:
+ ret = machine__process_bpf_event(machine, event, sample); break;
default:
ret = -1;
break;
@@ -1988,7 +2072,7 @@ static void save_iterations(struct iterations *iter,
{
int i;
- iter->nr_loop_iter = nr;
+ iter->nr_loop_iter++;
iter->cycles = 0;
for (i = 0; i < nr; i++)
@@ -2140,6 +2224,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
return 0;
}
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct perf_evsel *evsel,
@@ -2246,6 +2351,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
}
check_calls:
+ if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
for (i = first_call, nr_entries = 0;
i < chain_nr && nr_entries < max_stack; i++) {
u64 ip;
@@ -2260,9 +2371,15 @@ check_calls:
continue;
#endif
ip = chain->ips[j];
-
if (ip < PERF_CONTEXT_MAX)
++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
@@ -2403,7 +2520,8 @@ int machine__for_each_thread(struct machine *machine,
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
threads = &machine->threads[i];
- for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&threads->entries); nd;
+ nd = rb_next(nd)) {
thread = rb_entry(nd, struct thread, rb_node);
rc = fn(thread, priv);
if (rc != 0)
@@ -2430,7 +2548,7 @@ int machines__for_each_thread(struct machines *machines,
if (rc != 0)
return rc;
- for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
struct machine *machine = rb_entry(nd, struct machine, rb_node);
rc = machine__for_each_thread(machine, fn, priv);
@@ -2443,15 +2561,13 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
if (target__has_task(target))
- return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
+ return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
else if (target__has_cpu(target))
return perf_event__synthesize_threads(tool, process,
machine, data_mmap,
- proc_map_timeout,
nr_threads_synthesize);
/* command specified */
return 0;
@@ -2542,6 +2658,33 @@ int machine__get_kernel_start(struct machine *machine)
return err;
}
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+ u8 addr_cpumode = cpumode;
+ bool kernel_ip;
+
+ if (!machine->single_address_space)
+ goto out;
+
+ kernel_ip = machine__kernel_ip(machine, addr);
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ case PERF_RECORD_MISC_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ case PERF_RECORD_MISC_GUEST_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ break;
+ default:
+ break;
+ }
+out:
+ return addr_cpumode;
+}
+
struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
{
return dsos__findnew(&machine->dsos, filename);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index d856b85862e2..f70ab98a7bde 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -4,7 +4,7 @@
#include <sys/types.h>
#include <linux/rbtree.h>
-#include "map.h"
+#include "map_groups.h"
#include "dso.h"
#include "event.h"
#include "rwsem.h"
@@ -29,11 +29,11 @@ struct vdso_info;
#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
struct threads {
- struct rb_root entries;
- struct rw_semaphore lock;
- unsigned int nr;
- struct list_head dead;
- struct thread *last_match;
+ struct rb_root_cached entries;
+ struct rw_semaphore lock;
+ unsigned int nr;
+ struct list_head dead;
+ struct thread *last_match;
};
struct machine {
@@ -42,6 +42,7 @@ struct machine {
u16 id_hdr_size;
bool comm_exec;
bool kptr_restrict_warned;
+ bool single_address_space;
char *root_dir;
char *mmap_name;
struct threads threads[THREADS__TABLE_SIZE];
@@ -99,6 +100,8 @@ static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
return ip >= kernel_start;
}
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid);
struct comm *machine__thread_exec_comm(struct machine *machine,
@@ -127,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
+int machine__process_ksymbol(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
@@ -134,7 +140,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data);
struct machines {
struct machine host;
- struct rb_root guests;
+ struct rb_root_cached guests;
};
void machines__init(struct machines *machines);
@@ -247,17 +253,14 @@ int machines__for_each_thread(struct machines *machines,
int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
struct target *target, struct thread_map *threads,
perf_event__handler_t process, bool data_mmap,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize);
static inline
int machine__synthesize_threads(struct machine *machine, struct target *target,
struct thread_map *threads, bool data_mmap,
- unsigned int proc_map_timeout,
unsigned int nr_threads_synthesize)
{
return __machine__synthesize_threads(machine, NULL, target, threads,
perf_event__process, data_mmap,
- proc_map_timeout,
nr_threads_synthesize);
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 354e54550d2b..fbeb0c6efaa6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -19,8 +19,10 @@
#include "srcline.h"
#include "namespaces.h"
#include "unwind.h"
+#include "srccode.h"
static void __maps__insert(struct maps *maps, struct map *map);
+static void __maps__insert_name(struct maps *maps, struct map *map);
static inline int is_anon_memory(const char *filename, u32 flags)
{
@@ -284,8 +286,8 @@ void map__put(struct map *map)
void map__fixup_start(struct map *map)
{
- struct rb_root *symbols = &map->dso->symbols;
- struct rb_node *nd = rb_first(symbols);
+ struct rb_root_cached *symbols = &map->dso->symbols;
+ struct rb_node *nd = rb_first_cached(symbols);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
map->start = sym->start;
@@ -294,8 +296,8 @@ void map__fixup_start(struct map *map)
void map__fixup_end(struct map *map)
{
- struct rb_root *symbols = &map->dso->symbols;
- struct rb_node *nd = rb_last(symbols);
+ struct rb_root_cached *symbols = &map->dso->symbols;
+ struct rb_node *nd = rb_last(&symbols->rb_root);
if (nd != NULL) {
struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
map->end = sym->end;
@@ -420,6 +422,54 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
return ret;
}
+int map__fprintf_srccode(struct map *map, u64 addr,
+ FILE *fp,
+ struct srccode_state *state)
+{
+ char *srcfile;
+ int ret = 0;
+ unsigned line;
+ int len;
+ char *srccode;
+
+ if (!map || !map->dso)
+ return 0;
+ srcfile = get_srcline_split(map->dso,
+ map__rip_2objdump(map, addr),
+ &line);
+ if (!srcfile)
+ return 0;
+
+ /* Avoid redundant printing */
+ if (state &&
+ state->srcfile &&
+ !strcmp(state->srcfile, srcfile) &&
+ state->line == line) {
+ free(srcfile);
+ return 0;
+ }
+
+ srccode = find_sourceline(srcfile, line, &len);
+ if (!srccode)
+ goto out_free_line;
+
+ ret = fprintf(fp, "|%-8d %.*s", line, len, srccode);
+ state->srcfile = srcfile;
+ state->line = line;
+ return ret;
+
+out_free_line:
+ free(srcfile);
+ return ret;
+}
+
+
+void srccode_state_free(struct srccode_state *state)
+{
+ zfree(&state->srcfile);
+ state->line = 0;
+}
+
/**
* map__rip_2objdump - convert symbol start address to objdump address.
* @map: memory map
@@ -496,6 +546,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
static void maps__init(struct maps *maps)
{
maps->entries = RB_ROOT;
+ maps->names = RB_ROOT;
init_rwsem(&maps->lock);
}
@@ -506,6 +557,12 @@ void map_groups__init(struct map_groups *mg, struct machine *machine)
refcount_set(&mg->refcnt, 1);
}
+void map_groups__insert(struct map_groups *mg, struct map *map)
+{
+ maps__insert(&mg->maps, map);
+ map->groups = mg;
+}
+
static void __maps__purge(struct maps *maps)
{
struct rb_root *root = &maps->entries;
@@ -664,6 +721,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
static void __map_groups__insert(struct map_groups *mg, struct map *map)
{
__maps__insert(&mg->maps, map);
+ __maps__insert_name(&mg->maps, map);
map->groups = mg;
}
@@ -824,10 +882,34 @@ static void __maps__insert(struct maps *maps, struct map *map)
map__get(map);
}
+static void __maps__insert_name(struct maps *maps, struct map *map)
+{
+ struct rb_node **p = &maps->names.rb_node;
+ struct rb_node *parent = NULL;
+ struct map *m;
+ int rc;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map, rb_node_name);
+ rc = strcmp(m->dso->short_name, map->dso->short_name);
+ if (rc < 0)
+ p = &(*p)->rb_left;
+ else if (rc > 0)
+ p = &(*p)->rb_right;
+ else
+ return;
+ }
+ rb_link_node(&map->rb_node_name, parent, p);
+ rb_insert_color(&map->rb_node_name, &maps->names);
+ map__get(map);
+}
+
void maps__insert(struct maps *maps, struct map *map)
{
down_write(&maps->lock);
__maps__insert(maps, map);
+ __maps__insert_name(maps, map);
up_write(&maps->lock);
}
@@ -846,19 +928,18 @@ void maps__remove(struct maps *maps, struct map *map)
struct map *maps__find(struct maps *maps, u64 ip)
{
- struct rb_node **p, *parent = NULL;
+ struct rb_node *p;
struct map *m;
down_read(&maps->lock);
- p = &maps->entries.rb_node;
- while (*p != NULL) {
- parent = *p;
- m = rb_entry(parent, struct map, rb_node);
+ p = maps->entries.rb_node;
+ while (p != NULL) {
+ m = rb_entry(p, struct map, rb_node);
if (ip < m->start)
- p = &(*p)->rb_left;
+ p = p->rb_left;
else if (ip >= m->end)
- p = &(*p)->rb_right;
+ p = p->rb_right;
else
goto out;
}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index e0f327b51e66..0e20749f2c55 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -6,12 +6,10 @@
#include <linux/compiler.h>
#include <linux/list.h>
#include <linux/rbtree.h>
-#include <pthread.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <linux/types.h>
-#include "rwsem.h"
struct dso;
struct ip_callchain;
@@ -25,6 +23,7 @@ struct map {
struct rb_node rb_node;
struct list_head node;
};
+ struct rb_node rb_node_name;
u64 start;
u64 end;
bool erange_warned;
@@ -47,37 +46,7 @@ struct map {
refcount_t refcnt;
};
-#define KMAP_NAME_LEN 256
-
-struct kmap {
- struct ref_reloc_sym *ref_reloc_sym;
- struct map_groups *kmaps;
- char name[KMAP_NAME_LEN];
-};
-
-struct maps {
- struct rb_root entries;
- struct rw_semaphore lock;
-};
-
-struct map_groups {
- struct maps maps;
- struct machine *machine;
- refcount_t refcnt;
-};
-
-struct map_groups *map_groups__new(struct machine *machine);
-void map_groups__delete(struct map_groups *mg);
-bool map_groups__empty(struct map_groups *mg);
-
-static inline struct map_groups *map_groups__get(struct map_groups *mg)
-{
- if (mg)
- refcount_inc(&mg->refcnt);
- return mg;
-}
-
-void map_groups__put(struct map_groups *mg);
+struct kmap;
struct kmap *__map__kmap(struct map *map);
struct kmap *map__kmap(struct map *map);
@@ -172,6 +141,11 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
FILE *fp);
+struct srccode_state;
+
+int map__fprintf_srccode(struct map *map, u64 addr,
+ FILE *fp, struct srccode_state *state);
+
int map__load(struct map *map);
struct symbol *map__find_symbol(struct map *map, u64 addr);
struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
@@ -180,61 +154,9 @@ void map__fixup_end(struct map *map);
void map__reloc_vmlinux(struct map *map);
-void maps__insert(struct maps *maps, struct map *map);
-void maps__remove(struct maps *maps, struct map *map);
-struct map *maps__find(struct maps *maps, u64 addr);
-struct map *maps__first(struct maps *maps);
-struct map *map__next(struct map *map);
-struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
- struct map **mapp);
-void map_groups__init(struct map_groups *mg, struct machine *machine);
-void map_groups__exit(struct map_groups *mg);
-int map_groups__clone(struct thread *thread,
- struct map_groups *parent);
-size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
-
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
u64 addr);
-static inline void map_groups__insert(struct map_groups *mg, struct map *map)
-{
- maps__insert(&mg->maps, map);
- map->groups = mg;
-}
-
-static inline void map_groups__remove(struct map_groups *mg, struct map *map)
-{
- maps__remove(&mg->maps, map);
-}
-
-static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
-{
- return maps__find(&mg->maps, addr);
-}
-
-struct map *map_groups__first(struct map_groups *mg);
-
-static inline struct map *map_groups__next(struct map *map)
-{
- return map__next(map);
-}
-
-struct symbol *map_groups__find_symbol(struct map_groups *mg,
- u64 addr, struct map **mapp);
-
-struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
- const char *name,
- struct map **mapp);
-
-struct addr_map_symbol;
-
-int map_groups__find_ams(struct addr_map_symbol *ams);
-
-int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
- FILE *fp);
-
-struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
-
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h
new file mode 100644
index 000000000000..4dcda33e0fdf
--- /dev/null
+++ b/tools/perf/util/map_groups.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAP_GROUPS_H
+#define __PERF_MAP_GROUPS_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include "rwsem.h"
+
+struct ref_reloc_sym;
+struct machine;
+struct map;
+struct thread;
+
+struct maps {
+ struct rb_root entries;
+ struct rb_root names;
+ struct rw_semaphore lock;
+};
+
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
+
+struct map_groups {
+ struct maps maps;
+ struct machine *machine;
+ refcount_t refcnt;
+};
+
+#define KMAP_NAME_LEN 256
+
+struct kmap {
+ struct ref_reloc_sym *ref_reloc_sym;
+ struct map_groups *kmaps;
+ char name[KMAP_NAME_LEN];
+};
+
+struct map_groups *map_groups__new(struct machine *machine);
+void map_groups__delete(struct map_groups *mg);
+bool map_groups__empty(struct map_groups *mg);
+
+static inline struct map_groups *map_groups__get(struct map_groups *mg)
+{
+ if (mg)
+ refcount_inc(&mg->refcnt);
+ return mg;
+}
+
+void map_groups__put(struct map_groups *mg);
+void map_groups__init(struct map_groups *mg, struct machine *machine);
+void map_groups__exit(struct map_groups *mg);
+int map_groups__clone(struct thread *thread, struct map_groups *parent);
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
+
+void map_groups__insert(struct map_groups *mg, struct map *map);
+
+static inline void map_groups__remove(struct map_groups *mg, struct map *map)
+{
+ maps__remove(&mg->maps, map);
+}
+
+static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
+{
+ return maps__find(&mg->maps, addr);
+}
+
+struct map *map_groups__first(struct map_groups *mg);
+
+static inline struct map *map_groups__next(struct map *map)
+{
+ return map__next(map);
+}
+
+struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp);
+struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp);
+
+struct addr_map_symbol;
+
+int map_groups__find_ams(struct addr_map_symbol *ams);
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp);
+
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
+
+#endif // __PERF_MAP_GROUPS_H
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
new file mode 100644
index 000000000000..5a1aed9f6bb4
--- /dev/null
+++ b/tools/perf/util/map_symbol.h
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __PERF_MAP_SYMBOL
+#define __PERF_MAP_SYMBOL 1
+
+#include <linux/types.h>
+
+struct map;
+struct symbol;
+
+struct map_symbol {
+ struct map *map;
+ struct symbol *sym;
+};
+
+struct addr_map_symbol {
+ struct map *map;
+ struct symbol *sym;
+ u64 addr;
+ u64 al_addr;
+ u64 phys_addr;
+};
+#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 93f74d8d3cdd..42c3e5a229d2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
static char mem_loads_name[100];
static bool mem_loads_name__init;
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
{
if (i == PERF_MEM_EVENTS__LOAD) {
if (!mem_loads_name__init) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index a28f9b5cc4ff..b8d864ed4afe 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
}
void metricgroup__print(bool metrics, bool metricgroups, char *filter,
- bool raw)
+ bool raw, bool details)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
@@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
if (asprintf(&s, "%s\n%*s%s]",
pe->metric_name, 8, "[", pe->desc) < 0)
return;
+
+ if (details) {
+ if (asprintf(&s, "%s\n%*s%s]",
+ s, 8, "[", pe->metric_expr) < 0)
+ return;
+ }
}
if (!s)
@@ -352,7 +358,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
else if (metrics && !raw)
printf("\nMetrics:\n\n");
- for (node = rb_first(&groups.entries); node; node = next) {
+ for (node = rb_first_cached(&groups.entries); node; node = next) {
struct mep *me = container_of(node, struct mep, nd);
if (metricgroups)
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 8a155dba0581..5c52097a5c63 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt,
const char *str,
struct rblist *metric_events);
-void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
+void metricgroup__print(bool metrics, bool groups, char *filter,
+ bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index cdb95b3a1213..cdc7740fc181 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -10,6 +10,9 @@
#include <sys/mman.h>
#include <inttypes.h>
#include <asm/bug.h>
+#ifdef HAVE_LIBNUMA_SUPPORT
+#include <numaif.h>
+#endif
#include "debug.h"
#include "event.h"
#include "mmap.h"
@@ -153,8 +156,224 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
{
}
+#ifdef HAVE_AIO_SUPPORT
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+{
+ map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (map->aio.data[idx] == MAP_FAILED) {
+ map->aio.data[idx] = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+{
+ if (map->aio.data[idx]) {
+ munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
+ map->aio.data[idx] = NULL;
+ }
+}
+
+static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity)
+{
+ void *data;
+ size_t mmap_len;
+ unsigned long node_mask;
+
+ if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
+ data = map->aio.data[idx];
+ mmap_len = perf_mmap__mmap_len(map);
+ node_mask = 1UL << cpu__get_node(cpu);
+ if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
+ data, data + mmap_len, cpu__get_node(cpu));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+#else
+static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
+{
+ map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
+ if (map->aio.data[idx] == NULL)
+ return -1;
+
+ return 0;
+}
+
+static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
+{
+ zfree(&(map->aio.data[idx]));
+}
+
+static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused,
+ int cpu __maybe_unused, int affinity __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
+{
+ int delta_max, i, prio, ret;
+
+ map->aio.nr_cblocks = mp->nr_cblocks;
+ if (map->aio.nr_cblocks) {
+ map->aio.aiocb = calloc(map->aio.nr_cblocks, sizeof(struct aiocb *));
+ if (!map->aio.aiocb) {
+ pr_debug2("failed to allocate aiocb for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.cblocks = calloc(map->aio.nr_cblocks, sizeof(struct aiocb));
+ if (!map->aio.cblocks) {
+ pr_debug2("failed to allocate cblocks for data buffer, error %m\n");
+ return -1;
+ }
+ map->aio.data = calloc(map->aio.nr_cblocks, sizeof(void *));
+ if (!map->aio.data) {
+ pr_debug2("failed to allocate data buffer, error %m\n");
+ return -1;
+ }
+ delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
+ for (i = 0; i < map->aio.nr_cblocks; ++i) {
+ ret = perf_mmap__aio_alloc(map, i);
+ if (ret == -1) {
+ pr_debug2("failed to allocate data buffer area, error %m");
+ return -1;
+ }
+ ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity);
+ if (ret == -1)
+ return -1;
+ /*
+ * Use cblock.aio_fildes value different from -1
+ * to denote started aio write operation on the
+ * cblock so it requires explicit record__aio_sync()
+ * call prior the cblock may be reused again.
+ */
+ map->aio.cblocks[i].aio_fildes = -1;
+ /*
+ * Allocate cblocks with priority delta to have
+ * faster aio write system calls because queued requests
+ * are kept in separate per-prio queues and adding
+ * a new request will iterate thru shorter per-prio
+ * list. Blocks with numbers higher than
+ * _SC_AIO_PRIO_DELTA_MAX go with priority 0.
+ */
+ prio = delta_max - i;
+ map->aio.cblocks[i].aio_reqprio = prio >= 0 ? prio : 0;
+ }
+ }
+
+ return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map)
+{
+ int i;
+
+ for (i = 0; i < map->aio.nr_cblocks; ++i)
+ perf_mmap__aio_free(map, i);
+ if (map->aio.data)
+ zfree(&map->aio.data);
+ zfree(&map->aio.cblocks);
+ zfree(&map->aio.aiocb);
+}
+
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
+ off_t *off)
+{
+ u64 head = perf_mmap__read_head(md);
+ unsigned char *data = md->base + page_size;
+ unsigned long size, size0 = 0;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(md);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 0 : -1;
+
+ /*
+ * md->base data is copied into md->data[idx] buffer to
+ * release space in the kernel buffer as fast as possible,
+ * thru perf_mmap__consume() below.
+ *
+ * That lets the kernel to proceed with storing more
+ * profiling data into the kernel buffer earlier than other
+ * per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of
+ * profiling data crosses the upper bound of the kernel buffer.
+ * In this case we first move part of data from md->start
+ * till the upper bound and then the reminder from the
+ * beginning of the kernel buffer till the end of
+ * the data chunk.
+ */
+
+ size = md->end - md->start;
+
+ if ((md->start & md->mask) + size != (md->end & md->mask)) {
+ buf = &data[md->start & md->mask];
+ size = md->mask + 1 - (md->start & md->mask);
+ md->start += size;
+ memcpy(md->aio.data[idx], buf, size);
+ size0 = size;
+ }
+
+ buf = &data[md->start & md->mask];
+ size = md->end - md->start;
+ md->start += size;
+ memcpy(md->aio.data[idx] + size0, buf, size);
+
+ /*
+ * Increment md->refcount to guard md->data[idx] buffer
+ * from premature deallocation because md object can be
+ * released earlier than aio write request started
+ * on mmap->data[idx] is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started request completion.
+ */
+ perf_mmap__get(md);
+
+ md->prev = head;
+ perf_mmap__consume(md);
+
+ rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
+ if (!rc) {
+ *off += size0 + size;
+ } else {
+ /*
+ * Decrement md->refcount back if aio write
+ * operation failed to start.
+ */
+ perf_mmap__put(md);
+ }
+
+ return rc;
+}
+#else
+static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
+ struct mmap_params *mp __maybe_unused)
+{
+ return 0;
+}
+
+static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
+{
+}
+#endif
+
void perf_mmap__munmap(struct perf_mmap *map)
{
+ perf_mmap__aio_munmap(map);
if (map->base != NULL) {
munmap(map->base, perf_mmap__mmap_len(map));
map->base = NULL;
@@ -164,6 +383,32 @@ void perf_mmap__munmap(struct perf_mmap *map)
auxtrace_mmap__munmap(&map->auxtrace_mmap);
}
+static void build_node_mask(int node, cpu_set_t *mask)
+{
+ int c, cpu, nr_cpus;
+ const struct cpu_map *cpu_map = NULL;
+
+ cpu_map = cpu_map__online();
+ if (!cpu_map)
+ return;
+
+ nr_cpus = cpu_map__nr(cpu_map);
+ for (c = 0; c < nr_cpus; c++) {
+ cpu = cpu_map->map[c]; /* map c index to online cpu index */
+ if (cpu__get_node(cpu) == node)
+ CPU_SET(cpu, mask);
+ }
+}
+
+static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp)
+{
+ CPU_ZERO(&map->affinity_mask);
+ if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
+ build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask);
+ else if (mp->affinity == PERF_AFFINITY_CPU)
+ CPU_SET(map->cpu, &map->affinity_mask);
+}
+
int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
{
/*
@@ -193,11 +438,13 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
map->fd = fd;
map->cpu = cpu;
+ perf_mmap__setup_affinity_mask(map, mp);
+
if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
&mp->auxtrace_mp, map->base, fd))
return -1;
- return 0;
+ return perf_mmap__aio_mmap(map, mp);
}
static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cc5e2d6d17a9..e566c19b242b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -6,9 +6,13 @@
#include <linux/types.h>
#include <linux/ring_buffer.h>
#include <stdbool.h>
+#ifdef HAVE_AIO_SUPPORT
+#include <aio.h>
+#endif
#include "auxtrace.h"
#include "event.h"
+struct aiocb;
/**
* struct perf_mmap - perf's ring buffer mmap details
*
@@ -26,6 +30,15 @@ struct perf_mmap {
bool overwrite;
struct auxtrace_mmap auxtrace_mmap;
char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+#ifdef HAVE_AIO_SUPPORT
+ struct {
+ void **data;
+ struct aiocb *cblocks;
+ struct aiocb **aiocb;
+ int nr_cblocks;
+ } aio;
+#endif
+ cpu_set_t affinity_mask;
};
/*
@@ -57,7 +70,7 @@ enum bkw_mmap_state {
};
struct mmap_params {
- int prot, mask;
+ int prot, mask, nr_cblocks, affinity;
struct auxtrace_mmap_params auxtrace_mp;
};
@@ -85,6 +98,18 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
int perf_mmap__push(struct perf_mmap *md, void *to,
int push(struct perf_mmap *map, void *to, void *buf, size_t size));
+#ifdef HAVE_AIO_SUPPORT
+int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
+ off_t *off);
+#else
+static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
+ off_t *off __maybe_unused)
+{
+ return 0;
+}
+#endif
size_t perf_mmap__mmap_len(struct perf_mmap *map);
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
index cf8bd123cf73..aed170bd4384 100644
--- a/tools/perf/util/namespaces.c
+++ b/tools/perf/util/namespaces.c
@@ -18,6 +18,7 @@
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <asm/bug.h>
struct namespaces *namespaces__new(struct namespaces_event *event)
{
@@ -186,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
char curpath[PATH_MAX];
int oldns = -1;
int newns = -1;
+ char *oldcwd = NULL;
if (nc == NULL)
return;
@@ -199,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
return;
+ oldcwd = get_current_dir_name();
+ if (!oldcwd)
+ return;
+
oldns = open(curpath, O_RDONLY);
if (oldns < 0)
- return;
+ goto errout;
newns = open(nsi->mntns_path, O_RDONLY);
if (newns < 0)
@@ -210,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi,
if (setns(newns, CLONE_NEWNS) < 0)
goto errout;
+ nc->oldcwd = oldcwd;
nc->oldns = oldns;
nc->newns = newns;
return;
errout:
+ free(oldcwd);
if (oldns > -1)
close(oldns);
if (newns > -1)
@@ -223,11 +231,16 @@ errout:
void nsinfo__mountns_exit(struct nscookie *nc)
{
- if (nc == NULL || nc->oldns == -1 || nc->newns == -1)
+ if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
return;
setns(nc->oldns, CLONE_NEWNS);
+ if (nc->oldcwd) {
+ WARN_ON_ONCE(chdir(nc->oldcwd));
+ zfree(&nc->oldcwd);
+ }
+
if (nc->oldns > -1) {
close(nc->oldns);
nc->oldns = -1;
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
index cae1a9a39722..d5f46c09ea31 100644
--- a/tools/perf/util/namespaces.h
+++ b/tools/perf/util/namespaces.h
@@ -38,6 +38,7 @@ struct nsinfo {
struct nscookie {
int oldns;
int newns;
+ char *oldcwd;
};
int nsinfo__init(struct nsinfo *nsi);
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 1904e7f6ec84..ea523d3b248f 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -219,13 +219,12 @@ int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
return 0;
}
-static int __ordered_events__flush(struct ordered_events *oe)
+static int do_flush(struct ordered_events *oe, bool show_progress)
{
struct list_head *head = &oe->events;
struct ordered_event *tmp, *iter;
u64 limit = oe->next_flush;
u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
- bool show_progress = limit == ULLONG_MAX;
struct ui_progress prog;
int ret;
@@ -263,7 +262,8 @@ static int __ordered_events__flush(struct ordered_events *oe)
return 0;
}
-int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
+ u64 timestamp)
{
static const char * const str[] = {
"NONE",
@@ -272,12 +272,16 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
"HALF ",
};
int err;
+ bool show_progress = false;
if (oe->nr_events == 0)
return 0;
switch (how) {
case OE_FLUSH__FINAL:
+ show_progress = true;
+ __fallthrough;
+ case OE_FLUSH__TOP:
oe->next_flush = ULLONG_MAX;
break;
@@ -298,6 +302,11 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
break;
}
+ case OE_FLUSH__TIME:
+ oe->next_flush = timestamp;
+ show_progress = false;
+ break;
+
case OE_FLUSH__ROUND:
case OE_FLUSH__NONE:
default:
@@ -308,7 +317,7 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
str[how], oe->nr_events);
pr_oe_time(oe->max_timestamp, "max_timestamp\n");
- err = __ordered_events__flush(oe);
+ err = do_flush(oe, show_progress);
if (!err) {
if (how == OE_FLUSH__ROUND)
@@ -324,7 +333,29 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
return err;
}
-void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver)
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+{
+ return __ordered_events__flush(oe, how, 0);
+}
+
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp)
+{
+ return __ordered_events__flush(oe, OE_FLUSH__TIME, timestamp);
+}
+
+u64 ordered_events__first_time(struct ordered_events *oe)
+{
+ struct ordered_event *event;
+
+ if (list_empty(&oe->events))
+ return 0;
+
+ event = list_first_entry(&oe->events, struct ordered_event, list);
+ return event->timestamp;
+}
+
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+ void *data)
{
INIT_LIST_HEAD(&oe->events);
INIT_LIST_HEAD(&oe->cache);
@@ -332,6 +363,7 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d
oe->max_alloc_size = (u64) -1;
oe->cur_alloc_size = 0;
oe->deliver = deliver;
+ oe->data = data;
}
static void
@@ -359,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe)
* Current buffer might not have all the events allocated
* yet, we need to free only allocated ones ...
*/
- list_del(&oe->buffer->list);
- ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+ if (oe->buffer) {
+ list_del(&oe->buffer->list);
+ ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+ }
/* ... and continue with the rest */
list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
@@ -375,5 +409,5 @@ void ordered_events__reinit(struct ordered_events *oe)
ordered_events__free(oe);
memset(oe, '\0', sizeof(*oe));
- ordered_events__init(oe, old_deliver);
+ ordered_events__init(oe, old_deliver, oe->data);
}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 1338d5c345dc..0920fb0ec6cc 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -18,6 +18,8 @@ enum oe_flush {
OE_FLUSH__FINAL,
OE_FLUSH__ROUND,
OE_FLUSH__HALF,
+ OE_FLUSH__TOP,
+ OE_FLUSH__TIME,
};
struct ordered_events;
@@ -47,15 +49,19 @@ struct ordered_events {
enum oe_flush last_flush_type;
u32 nr_unordered_events;
bool copy_on_queue;
+ void *data;
};
int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
u64 timestamp, u64 file_offset);
void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
-void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
+int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
+ void *data);
void ordered_events__free(struct ordered_events *oe);
void ordered_events__reinit(struct ordered_events *oe);
+u64 ordered_events__first_time(struct ordered_events *oe);
static inline
void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f8cd3e7c9186..4dcc01b2532c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -926,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
[PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
[PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
[PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
+ [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr",
[PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
[PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
[PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
@@ -1037,6 +1038,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
CHECK_TYPE_VAL(NUM);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ CHECK_TYPE_VAL(NUM);
+ break;
default:
err->str = strdup("unknown term");
err->idx = term->err_term;
@@ -1084,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
case PARSE_EVENTS__TERM_TYPE_INHERIT:
case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
return config_term_common(attr, term, err);
@@ -1162,6 +1167,9 @@ do { \
case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
+ ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num);
+ break;
case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
break;
@@ -2454,7 +2462,7 @@ restart:
if (!name_only && strlen(syms->alias))
snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
else
- strncpy(name, syms->symbol, MAX_NAME_LEN);
+ strlcpy(name, syms->symbol, MAX_NAME_LEN);
evt_list[evt_i] = strdup(name);
if (evt_list[evt_i] == NULL)
@@ -2532,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_sdt_events(NULL, NULL, name_only);
- metricgroup__print(true, true, NULL, name_only);
+ metricgroup__print(true, true, NULL, name_only, details_flag);
}
int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 4473dac27aee..5ed035cbcbb7 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -71,6 +71,7 @@ enum {
PARSE_EVENTS__TERM_TYPE_NOINHERIT,
PARSE_EVENTS__TERM_TYPE_INHERIT,
PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+ PARSE_EVENTS__TERM_TYPE_MAX_EVENTS,
PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
PARSE_EVENTS__TERM_TYPE_OVERWRITE,
PARSE_EVENTS__TERM_TYPE_DRV_CFG,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 5f761f3ed0f3..7805c71aaae2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -269,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); }
inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index da8fe57691b8..44819bdb037d 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -311,7 +311,7 @@ value_sym '/' event_config '/'
$$ = list;
}
|
-value_sym sep_slash_dc
+value_sym sep_slash_slash_dc
{
struct list_head *list;
int type = $1 >> 16;
@@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
sep_dc: ':' |
-sep_slash_dc: '/' | ':' |
+sep_slash_slash_dc: '/' '/' | ':' |
%%
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7799788f662f..51d437f55d18 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -29,8 +29,6 @@ struct perf_pmu_format {
struct list_head list;
};
-#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
-
int perf_pmu_parse(struct list_head *list, char *name);
extern FILE *perf_pmu_in;
@@ -145,7 +143,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
int fd, ret = -1;
char path[PATH_MAX];
- snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+ scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
@@ -175,7 +173,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
ssize_t sret;
int fd;
- snprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+ scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
@@ -205,7 +203,7 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name)
char path[PATH_MAX];
int fd;
- snprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
+ scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
@@ -223,7 +221,7 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
char path[PATH_MAX];
int fd;
- snprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
+ scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
fd = open(path, O_RDONLY);
if (fd == -1)
@@ -655,45 +653,6 @@ static int is_arm_pmu_core(const char *name)
return 0;
}
-/*
- * Return the CPU id as a raw string.
- *
- * Each architecture should provide a more precise id string that
- * can be use to match the architecture's "mapfile".
- */
-char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
-{
- return NULL;
-}
-
-/* Return zero when the cpuid from the mapfile.csv matches the
- * cpuid string generated on this platform.
- * Otherwise return non-zero.
- */
-int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
-{
- regex_t re;
- regmatch_t pmatch[1];
- int match;
-
- if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
- /* Warn unable to generate match particular string. */
- pr_info("Invalid regular expression %s\n", mapcpuid);
- return 1;
- }
-
- match = !regexec(&re, cpuid, 1, pmatch, 0);
- regfree(&re);
- if (match) {
- size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
-
- /* Verify the entire string matched. */
- if (match_len == strlen(cpuid))
- return 0;
- }
- return 1;
-}
-
static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
{
char *cpuid;
@@ -773,7 +732,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
if (!is_arm_pmu_core(name)) {
pname = pe->pmu ? pe->pmu : "cpu";
- if (strncmp(pname, name, strlen(pname)))
+ if (strcmp(pname, name))
continue;
}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 76fecec7b3f9..47253c3daf55 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -6,9 +6,10 @@
#include <linux/compiler.h>
#include <linux/perf_event.h>
#include <stdbool.h>
-#include "evsel.h"
#include "parse-events.h"
+struct perf_evsel_config_term;
+
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
PERF_PMU_FORMAT_VALUE_CONFIG1,
@@ -16,6 +17,7 @@ enum {
};
#define PERF_PMU_FORMAT_BITS 64
+#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
struct perf_event_attr;
@@ -29,7 +31,6 @@ struct perf_pmu {
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
struct list_head list; /* ELEM */
- int (*set_drv_config) (struct perf_evsel_config_term *term);
};
struct perf_pmu_info {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f119eb628dbb..0030f9b9bf7e 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -35,11 +35,14 @@
#include "util.h"
#include "event.h"
+#include "namespaces.h"
#include "strlist.h"
#include "strfilter.h"
#include "debug.h"
#include "cache.h"
#include "color.h"
+#include "map.h"
+#include "map_groups.h"
#include "symbol.h"
#include "thread.h"
#include <api/fs/fs.h>
@@ -692,7 +695,7 @@ static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
return ret;
for (i = 0; i < ntevs && ret >= 0; i++) {
- /* point.address is the addres of point.symbol + point.offset */
+ /* point.address is the address of point.symbol + point.offset */
tevs[i].point.address -= stext;
tevs[i].point.module = strdup(exec);
if (!tevs[i].point.module) {
@@ -1819,6 +1822,12 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
tp->offset = strtoul(fmt2_str, NULL, 10);
}
+ if (tev->uprobes) {
+ fmt2_str = strchr(p, '(');
+ if (fmt2_str)
+ tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0);
+ }
+
tev->nargs = argc - 2;
tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
if (tev->args == NULL) {
@@ -2012,6 +2021,22 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
return err;
}
+static int
+synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf)
+{
+ struct probe_trace_point *tp = &tev->point;
+ int err;
+
+ err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address);
+
+ if (err >= 0 && tp->ref_ctr_offset) {
+ if (!uprobe_ref_ctr_is_supported())
+ return -1;
+ err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset);
+ }
+ return err >= 0 ? 0 : -1;
+}
+
char *synthesize_probe_trace_command(struct probe_trace_event *tev)
{
struct probe_trace_point *tp = &tev->point;
@@ -2041,15 +2066,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev)
}
/* Use the tp->address for uprobes */
- if (tev->uprobes)
- err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
- else if (!strncmp(tp->symbol, "0x", 2))
+ if (tev->uprobes) {
+ err = synthesize_uprobe_trace_def(tev, &buf);
+ } else if (!strncmp(tp->symbol, "0x", 2)) {
/* Absolute address. See try_to_find_absolute_address() */
err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
tp->module ? ":" : "", tp->address);
- else
+ } else {
err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
tp->module ? ":" : "", tp->symbol, tp->offset);
+ }
+
if (err)
goto error;
@@ -2633,6 +2660,13 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev)
{
int i;
char *buf = synthesize_probe_trace_command(tev);
+ struct probe_trace_point *tp = &tev->point;
+
+ if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) {
+ pr_warning("A semaphore is associated with %s:%s and "
+ "seems your kernel doesn't support it.\n",
+ tev->group, tev->event);
+ }
/* Old uprobe event doesn't support memory dereference */
if (!tev->uprobes || tev->nargs == 0 || !buf)
@@ -3031,7 +3065,7 @@ static int try_to_find_absolute_address(struct perf_probe_event *pev,
/*
* Give it a '0x' leading symbol name.
* In __add_probe_trace_events, a NULL symbol is interpreted as
- * invalud.
+ * invalid.
*/
if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
goto errout;
@@ -3497,7 +3531,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
/* Show all (filtered) symbols */
setup_pager();
- for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&map->dso->symbol_names); nd;
+ nd = rb_next(nd)) {
struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
if (strfilter__compare(_filter, pos->sym.name))
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 45b14f020558..05c8d571a901 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -4,8 +4,9 @@
#include <linux/compiler.h>
#include <stdbool.h>
-#include "intlist.h"
-#include "namespaces.h"
+
+struct intlist;
+struct nsinfo;
/* Probe related configurations */
struct probe_conf {
@@ -27,6 +28,7 @@ struct probe_trace_point {
char *symbol; /* Base symbol */
char *module; /* Module name */
unsigned long offset; /* Offset from symbol */
+ unsigned long ref_ctr_offset; /* SDT reference counter offset */
unsigned long address; /* Actual address of the trace point */
bool retprobe; /* Return probe flag */
};
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index b76088fadf3d..4062bc4412a9 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -20,6 +20,7 @@
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
+#include "namespaces.h"
#include "util.h"
#include "event.h"
#include "strlist.h"
@@ -424,7 +425,7 @@ static int probe_cache__open(struct probe_cache *pcache, const char *target,
if (target && build_id_cache__cached(target)) {
/* This is a cached buildid */
- strncpy(sbuildid, target, SBUILD_ID_SIZE);
+ strlcpy(sbuildid, target, SBUILD_ID_SIZE);
dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
goto found;
}
@@ -696,8 +697,16 @@ out_err:
#ifdef HAVE_GELF_GETNOTE_SUPPORT
static unsigned long long sdt_note__get_addr(struct sdt_note *note)
{
- return note->bit32 ? (unsigned long long)note->addr.a32[0]
- : (unsigned long long)note->addr.a64[0];
+ return note->bit32 ?
+ (unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] :
+ (unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC];
+}
+
+static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note)
+{
+ return note->bit32 ?
+ (unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] :
+ (unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR];
}
static const char * const type_to_suffix[] = {
@@ -775,14 +784,21 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note,
{
struct strbuf buf;
char *ret = NULL, **args;
- int i, args_count;
+ int i, args_count, err;
+ unsigned long long ref_ctr_offset;
if (strbuf_init(&buf, 32) < 0)
return NULL;
- if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
- sdtgrp, note->name, pathname,
- sdt_note__get_addr(note)) < 0)
+ err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+ sdtgrp, note->name, pathname,
+ sdt_note__get_addr(note));
+
+ ref_ctr_offset = sdt_note__get_ref_ctr_offset(note);
+ if (ref_ctr_offset && err >= 0)
+ err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset);
+
+ if (err < 0)
goto error;
if (!note->args)
@@ -998,6 +1014,7 @@ int probe_cache__show_all_caches(struct strfilter *filter)
enum ftrace_readme {
FTRACE_README_PROBE_TYPE_X = 0,
FTRACE_README_KRETPROBE_OFFSET,
+ FTRACE_README_UPROBE_REF_CTR,
FTRACE_README_END,
};
@@ -1009,6 +1026,7 @@ static struct {
[idx] = {.pattern = pat, .avail = false}
DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+ DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
};
static bool scan_ftrace_readme(enum ftrace_readme type)
@@ -1064,3 +1082,8 @@ bool kretprobe_offset_is_supported(void)
{
return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
}
+
+bool uprobe_ref_ctr_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
index 63f29b1d22c1..2a249182f2a6 100644
--- a/tools/perf/util/probe-file.h
+++ b/tools/perf/util/probe-file.h
@@ -69,6 +69,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
int probe_cache__show_all_caches(struct strfilter *filter);
bool probe_type_is_available(enum probe_type type);
bool kretprobe_offset_is_supported(void);
+bool uprobe_ref_ctr_is_supported(void);
#else /* ! HAVE_LIBELF_SUPPORT */
static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
{
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 50150dfc0cdf..dda0ac978b1e 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -386,7 +386,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
struct tep_format_field *field;
if (!evsel->tp_format) {
- struct tep_event_format *tp_format;
+ struct tep_event *tp_format;
tp_format = trace_event__tp_format_id(evsel->attr.config);
if (!tp_format)
@@ -939,7 +939,8 @@ static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
file = PyFile_FromFile(fp, "perf", "r", NULL);
#else
- file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1, NULL, NULL, NULL, 1);
+ file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+ NULL, NULL, NULL, 0);
#endif
if (file == NULL)
goto free_list;
@@ -1240,7 +1241,7 @@ static struct {
static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
PyObject *args, PyObject *kwargs)
{
- struct tep_event_format *tp_format;
+ struct tep_event *tp_format;
static char *kwlist[] = { "sys", "name", NULL };
char *sys = NULL;
char *name = NULL;
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index a920f702a74d..376e86cb4c3c 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -140,12 +140,12 @@ struct __name##_sorted *__name = __name##_sorted__new
/* For 'struct intlist' */
#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
- DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \
+ DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
__ilist->rblist.nr_entries)
/* For 'struct machine->threads' */
-#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
- DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
- __machine->threads[hash_bucket].nr)
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
+ __machine->threads[hash_bucket].nr)
#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index 0efc3258c648..11e07fab20dc 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -13,8 +13,9 @@
int rblist__add_node(struct rblist *rblist, const void *new_entry)
{
- struct rb_node **p = &rblist->entries.rb_node;
+ struct rb_node **p = &rblist->entries.rb_root.rb_node;
struct rb_node *parent = NULL, *new_node;
+ bool leftmost = true;
while (*p != NULL) {
int rc;
@@ -24,8 +25,10 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
rc = rblist->node_cmp(parent, new_entry);
if (rc > 0)
p = &(*p)->rb_left;
- else if (rc < 0)
+ else if (rc < 0) {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
else
return -EEXIST;
}
@@ -35,7 +38,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
return -ENOMEM;
rb_link_node(new_node, parent, p);
- rb_insert_color(new_node, &rblist->entries);
+ rb_insert_color_cached(new_node, &rblist->entries, leftmost);
++rblist->nr_entries;
return 0;
@@ -43,7 +46,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
{
- rb_erase(rb_node, &rblist->entries);
+ rb_erase_cached(rb_node, &rblist->entries);
--rblist->nr_entries;
rblist->node_delete(rblist, rb_node);
}
@@ -52,8 +55,9 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
const void *entry,
bool create)
{
- struct rb_node **p = &rblist->entries.rb_node;
+ struct rb_node **p = &rblist->entries.rb_root.rb_node;
struct rb_node *parent = NULL, *new_node = NULL;
+ bool leftmost = true;
while (*p != NULL) {
int rc;
@@ -63,8 +67,10 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
rc = rblist->node_cmp(parent, entry);
if (rc > 0)
p = &(*p)->rb_left;
- else if (rc < 0)
+ else if (rc < 0) {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
else
return parent;
}
@@ -73,7 +79,8 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
new_node = rblist->node_new(rblist, entry);
if (new_node) {
rb_link_node(new_node, parent, p);
- rb_insert_color(new_node, &rblist->entries);
+ rb_insert_color_cached(new_node,
+ &rblist->entries, leftmost);
++rblist->nr_entries;
}
}
@@ -94,7 +101,7 @@ struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
void rblist__init(struct rblist *rblist)
{
if (rblist != NULL) {
- rblist->entries = RB_ROOT;
+ rblist->entries = RB_ROOT_CACHED;
rblist->nr_entries = 0;
}
@@ -103,7 +110,7 @@ void rblist__init(struct rblist *rblist)
void rblist__exit(struct rblist *rblist)
{
- struct rb_node *pos, *next = rb_first(&rblist->entries);
+ struct rb_node *pos, *next = rb_first_cached(&rblist->entries);
while (next) {
pos = next;
@@ -124,7 +131,8 @@ struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
{
struct rb_node *node;
- for (node = rb_first(&rblist->entries); node; node = rb_next(node)) {
+ for (node = rb_first_cached(&rblist->entries); node;
+ node = rb_next(node)) {
if (!idx--)
return node;
}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
index 76df15c27f5f..14b232a4d0b6 100644
--- a/tools/perf/util/rblist.h
+++ b/tools/perf/util/rblist.h
@@ -20,7 +20,7 @@
*/
struct rblist {
- struct rb_root entries;
+ struct rb_root_cached entries;
unsigned int nr_entries;
int (*node_cmp)(struct rb_node *rbn, const void *entry);
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
new file mode 100644
index 000000000000..d4356030b504
--- /dev/null
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for s390 CPU measurement counter set diagnostic facility
+ *
+ * Copyright IBM Corp. 2019
+ Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMCF_KERNEL_H
+#define S390_CPUMCF_KERNEL_H
+
+#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
+#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
+
+struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int set:16; /* 16-23 Counter set identifier */
+ unsigned int ctr:16; /* 24-39 Number of stored counters */
+ unsigned int res1:16; /* 40-63 Reserved */
+};
+
+struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */
+ /* 0 - 7 */
+ union {
+ struct {
+ unsigned int clock_base:1; /* TOD clock base */
+ unsigned int speed:1; /* CPU speed */
+ /* Measurement alerts */
+ unsigned int mtda:1; /* Loss of MT ctr. data alert */
+ unsigned int caca:1; /* Counter auth. change alert */
+ unsigned int lcda:1; /* Loss of counter data alert */
+ };
+ unsigned long flags; /* 0-63 All indicators */
+ };
+ /* 8 - 15 */
+ unsigned int cfvn:16; /* 64-79 Ctr First Version */
+ unsigned int csvn:16; /* 80-95 Ctr Second Version */
+ unsigned int cpu_speed:32; /* 96-127 CPU speed */
+ /* 16 - 23 */
+ unsigned long timestamp; /* 128-191 Timestamp (TOD) */
+ /* 24 - 55 */
+ union {
+ struct {
+ unsigned long progusage1;
+ unsigned long progusage2;
+ unsigned long progusage3;
+ unsigned long tod_base;
+ };
+ unsigned long progusage[4];
+ };
+ /* 56 - 63 */
+ unsigned int mach_type:16; /* Machine type */
+ unsigned int res1:16; /* Reserved */
+ unsigned int res2:32; /* Reserved */
+};
+
+#define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */
+#define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */
+#define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */
+#define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */
+#define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */
+#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index a2eeebbfb25f..c215704931dc 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -162,6 +162,7 @@
#include "auxtrace.h"
#include "s390-cpumsf.h"
#include "s390-cpumsf-kernel.h"
+#include "s390-cpumcf-kernel.h"
#include "config.h"
struct s390_cpumsf {
@@ -184,8 +185,58 @@ struct s390_cpumsf_queue {
struct auxtrace_buffer *buffer;
int cpu;
FILE *logfile;
+ FILE *logfile_ctr;
};
+/* Check if the raw data should be dumped to file. If this is the case and
+ * the file to dump to has not been opened for writing, do so.
+ *
+ * Return 0 on success and greater zero on error so processing continues.
+ */
+static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
+ struct perf_sample *sample)
+{
+ struct s390_cpumsf_queue *sfq;
+ struct auxtrace_queue *q;
+ int rc = 0;
+
+ if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
+ return rc;
+
+ q = &sf->queues.queue_array[sample->cpu];
+ sfq = q->priv;
+ if (!sfq) /* Queue not yet allocated */
+ return rc;
+
+ if (!sfq->logfile_ctr) {
+ char *name;
+
+ rc = (sf->logdir)
+ ? asprintf(&name, "%s/aux.ctr.%02x",
+ sf->logdir, sample->cpu)
+ : asprintf(&name, "aux.ctr.%02x", sample->cpu);
+ if (rc > 0)
+ sfq->logfile_ctr = fopen(name, "w");
+ if (sfq->logfile_ctr == NULL) {
+ pr_err("Failed to open counter set log file %s, "
+ "continue...\n", name);
+ rc = 1;
+ }
+ free(name);
+ }
+
+ if (sfq->logfile_ctr) {
+ /* See comment above for -4 */
+ size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
+ sfq->logfile_ctr);
+ if (n != 1) {
+ pr_err("Failed to write counter set data\n");
+ rc = 1;
+ }
+ }
+ return rc;
+}
+
/* Display s390 CPU measurement facility basic-sampling data entry */
static bool s390_cpumsf_basic_show(const char *color, size_t pos,
struct hws_basic_entry *basic)
@@ -301,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type,
*dsdes = 85;
*bsdes = 32;
break;
+ case 2964:
+ case 2965:
+ *dsdes = 112;
+ *bsdes = 32;
+ break;
default:
/* Illegal trailer entry */
return false;
@@ -506,7 +562,7 @@ static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
aux_ts = get_trailer_time(buf);
if (!aux_ts) {
pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
- sfq->buffer->data_offset);
+ (s64)sfq->buffer->data_offset);
aux_ts = ~0ULL;
goto out;
}
@@ -768,7 +824,7 @@ static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
}
static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
- pid_t pid, pid_t tid, u64 ip)
+ pid_t pid, pid_t tid, u64 ip, u64 timestamp)
{
char msg[MAX_AUXTRACE_ERROR_MSG];
union perf_event event;
@@ -776,7 +832,7 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
- code, cpu, pid, tid, ip, msg);
+ code, cpu, pid, tid, ip, msg, timestamp);
err = perf_session__deliver_synth_event(sf->session, &event, NULL);
if (err)
@@ -788,11 +844,12 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
{
return s390_cpumsf_synth_error(sf, 1, sample->cpu,
- sample->pid, sample->tid, 0);
+ sample->pid, sample->tid, 0,
+ sample->time);
}
static int
-s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
+s390_cpumsf_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
struct perf_tool *tool)
@@ -801,6 +858,8 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
struct s390_cpumsf,
auxtrace);
u64 timestamp = sample->time;
+ struct perf_evsel *ev_bc000;
+
int err = 0;
if (dump_trace)
@@ -811,6 +870,16 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
return -EINVAL;
}
+ if (event->header.type == PERF_RECORD_SAMPLE &&
+ sample->raw_size) {
+ /* Handle event with raw data */
+ ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
+ if (ev_bc000 &&
+ ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG)
+ err = s390_cpumcf_dumpctr(sf, sample);
+ return err;
+ }
+
if (event->header.type == PERF_RECORD_AUX &&
event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
return s390_cpumsf_lost(sf, sample);
@@ -891,9 +960,15 @@ static void s390_cpumsf_free_queues(struct perf_session *session)
struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
queues->queue_array[i].priv;
- if (sfq != NULL && sfq->logfile) {
- fclose(sfq->logfile);
- sfq->logfile = NULL;
+ if (sfq != NULL) {
+ if (sfq->logfile) {
+ fclose(sfq->logfile);
+ sfq->logfile = NULL;
+ }
+ if (sfq->logfile_ctr) {
+ fclose(sfq->logfile_ctr);
+ sfq->logfile_ctr = NULL;
+ }
}
zfree(&queues->queue_array[i].priv);
}
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
new file mode 100644
index 000000000000..6650f599ed9c
--- /dev/null
+++ b/tools/perf/util/s390-sample-raw.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Architecture specific trace_event function. Save event's bc000 raw data
+ * to file. File name is aux.ctr.## where ## stands for the CPU number the
+ * sample was taken from.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <sys/stat.h>
+#include <linux/compiler.h>
+#include <asm/byteorder.h>
+
+#include "debug.h"
+#include "util.h"
+#include "auxtrace.h"
+#include "session.h"
+#include "evlist.h"
+#include "config.h"
+#include "color.h"
+#include "sample-raw.h"
+#include "s390-cpumcf-kernel.h"
+#include "pmu-events/pmu-events.h"
+
+static size_t ctrset_size(struct cf_ctrset_entry *set)
+{
+ return sizeof(*set) + set->ctr * sizeof(u64);
+}
+
+static bool ctrset_valid(struct cf_ctrset_entry *set)
+{
+ return set->def == S390_CPUMCF_DIAG_DEF;
+}
+
+/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte
+ * aligned and might have trailing padding bytes.
+ * Display the raw data on screen.
+ */
+static bool s390_cpumcfdg_testctr(struct perf_sample *sample)
+{
+ size_t len = sample->raw_size, offset = 0;
+ unsigned char *buf = sample->raw_data;
+ struct cf_trailer_entry *te;
+ struct cf_ctrset_entry *cep, ce;
+
+ if (!len)
+ return false;
+ while (offset < len) {
+ cep = (struct cf_ctrset_entry *)(buf + offset);
+ ce.def = be16_to_cpu(cep->def);
+ ce.set = be16_to_cpu(cep->set);
+ ce.ctr = be16_to_cpu(cep->ctr);
+ ce.res1 = be16_to_cpu(cep->res1);
+
+ if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) {
+ /* Raw data for counter sets are always multiple of 8
+ * bytes. Prepending a 4 bytes size field to the
+ * raw data block in the sample causes the perf tool
+ * to append 4 padding bytes to make the raw data part
+ * of the sample a multiple of eight bytes again.
+ *
+ * If the last entry (trailer) is 4 bytes off the raw
+ * area data end, all is good.
+ */
+ if (len - offset - sizeof(*te) == 4)
+ break;
+ pr_err("Invalid counter set entry at %zd\n", offset);
+ return false;
+ }
+ offset += ctrset_size(&ce);
+ }
+ return true;
+}
+
+/* Dump event bc000 on screen, already tested on correctness. */
+static void s390_cpumcfdg_dumptrail(const char *color, size_t offset,
+ struct cf_trailer_entry *tep)
+{
+ struct cf_trailer_entry te;
+
+ te.flags = be64_to_cpu(tep->flags);
+ te.cfvn = be16_to_cpu(tep->cfvn);
+ te.csvn = be16_to_cpu(tep->csvn);
+ te.cpu_speed = be32_to_cpu(tep->cpu_speed);
+ te.timestamp = be64_to_cpu(tep->timestamp);
+ te.progusage1 = be64_to_cpu(tep->progusage1);
+ te.progusage2 = be64_to_cpu(tep->progusage2);
+ te.progusage3 = be64_to_cpu(tep->progusage3);
+ te.tod_base = be64_to_cpu(tep->tod_base);
+ te.mach_type = be16_to_cpu(tep->mach_type);
+ te.res1 = be16_to_cpu(tep->res1);
+ te.res2 = be32_to_cpu(tep->res2);
+
+ color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c"
+ " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n",
+ offset, te.clock_base ? 'T' : ' ',
+ te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ',
+ te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ',
+ te.cfvn, te.csvn, te.cpu_speed, te.timestamp);
+ color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx"
+ " Type:%x\n\n",
+ te.progusage1, te.progusage2, te.progusage3,
+ te.tod_base, te.mach_type);
+}
+
+/* Return starting number of a counter set */
+static int get_counterset_start(int setnr)
+{
+ switch (setnr) {
+ case CPUMF_CTR_SET_BASIC: /* Basic counter set */
+ return 0;
+ case CPUMF_CTR_SET_USER: /* Problem state counter set */
+ return 32;
+ case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */
+ return 64;
+ case CPUMF_CTR_SET_EXT: /* Extended counter set */
+ return 128;
+ case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */
+ return 448;
+ default:
+ return -1;
+ }
+}
+
+/* Scan the PMU table and extract the logical name of a counter from the
+ * PMU events table. Input is the counter set and counter number with in the
+ * set. Construct the event number and use this as key. If they match return
+ * the name of this counter.
+ * If no match is found a NULL pointer is returned.
+ */
+static const char *get_counter_name(int set, int nr, struct pmu_events_map *map)
+{
+ int rc, event_nr, wanted = get_counterset_start(set) + nr;
+
+ if (map) {
+ struct pmu_event *evp = map->table;
+
+ for (; evp->name || evp->event || evp->desc; ++evp) {
+ if (evp->name == NULL || evp->event == NULL)
+ continue;
+ rc = sscanf(evp->event, "event=%x", &event_nr);
+ if (rc == 1 && event_nr == wanted)
+ return evp->name;
+ }
+ }
+ return NULL;
+}
+
+static void s390_cpumcfdg_dump(struct perf_sample *sample)
+{
+ size_t i, len = sample->raw_size, offset = 0;
+ unsigned char *buf = sample->raw_data;
+ const char *color = PERF_COLOR_BLUE;
+ struct cf_ctrset_entry *cep, ce;
+ struct pmu_events_map *map;
+ struct perf_pmu pmu;
+ u64 *p;
+
+ memset(&pmu, 0, sizeof(pmu));
+ map = perf_pmu__find_map(&pmu);
+ while (offset < len) {
+ cep = (struct cf_ctrset_entry *)(buf + offset);
+
+ ce.def = be16_to_cpu(cep->def);
+ ce.set = be16_to_cpu(cep->set);
+ ce.ctr = be16_to_cpu(cep->ctr);
+ ce.res1 = be16_to_cpu(cep->res1);
+
+ if (!ctrset_valid(&ce)) { /* Print trailer */
+ s390_cpumcfdg_dumptrail(color, offset,
+ (struct cf_trailer_entry *)cep);
+ return;
+ }
+
+ color_fprintf(stdout, color, " [%#08zx] Counterset:%d"
+ " Counters:%d\n", offset, ce.set, ce.ctr);
+ for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) {
+ const char *ev_name = get_counter_name(ce.set, i, map);
+
+ color_fprintf(stdout, color,
+ "\tCounter:%03d %s Value:%#018lx\n", i,
+ ev_name ?: "<unknown>", be64_to_cpu(*p));
+ }
+ offset += ctrset_size(&ce);
+ }
+}
+
+/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events
+ * and if the event was triggered by a counter set diagnostic event display
+ * its raw data.
+ * The function is only invoked when the dump flag -D is set.
+ */
+void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct perf_evsel *ev_bc000;
+
+ if (event->header.type != PERF_RECORD_SAMPLE)
+ return;
+
+ ev_bc000 = perf_evlist__event2evsel(evlist, event);
+ if (ev_bc000 == NULL ||
+ ev_bc000->attr.config != PERF_EVENT_CPUM_CF_DIAG)
+ return;
+
+ /* Display raw data on screen */
+ if (!s390_cpumcfdg_testctr(sample)) {
+ pr_err("Invalid counter set data encountered\n");
+ return;
+ }
+ s390_cpumcfdg_dump(sample);
+}
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
new file mode 100644
index 000000000000..c21e1311fb0f
--- /dev/null
+++ b/tools/perf/util/sample-raw.c
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <string.h>
+#include "evlist.h"
+#include "env.h"
+#include "sample-raw.h"
+
+/*
+ * Check platform the perf data file was created on and perform platform
+ * specific interpretation.
+ */
+void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist)
+{
+ const char *arch_pf = perf_env__arch(evlist->env);
+
+ if (arch_pf && !strcmp("s390", arch_pf))
+ evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw;
+}
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
new file mode 100644
index 000000000000..95d445c87e93
--- /dev/null
+++ b/tools/perf/util/sample-raw.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SAMPLE_RAW_H
+#define __SAMPLE_RAW_H 1
+
+struct perf_evlist;
+union perf_event;
+struct perf_sample;
+
+void perf_evlist__s390_sample_raw(struct perf_evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample);
+
+void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist);
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
index 82d28c67e0f3..7b342ce38d99 100644
--- a/tools/perf/util/scripting-engines/Build
+++ b/tools/perf/util/scripting-engines/Build
@@ -1,5 +1,5 @@
-libperf-$(CONFIG_LIBPERL) += trace-event-perl.o
-libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+perf-$(CONFIG_LIBPERL) += trace-event-perl.o
+perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 89cb887648f9..5f06378a482b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -37,6 +37,8 @@
#include "../../perf.h"
#include "../callchain.h"
#include "../machine.h"
+#include "../map.h"
+#include "../symbol.h"
#include "../thread.h"
#include "../event.h"
#include "../trace-event.h"
@@ -189,7 +191,7 @@ static void define_flag_field(const char *ev_name,
LEAVE;
}
-static void define_event_symbols(struct tep_event_format *event,
+static void define_event_symbols(struct tep_event *event,
const char *ev_name,
struct tep_print_arg *args)
{
@@ -338,7 +340,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
struct addr_location *al)
{
struct thread *thread = al->thread;
- struct tep_event_format *event = evsel->tp_format;
+ struct tep_event *event = evsel->tp_format;
struct tep_format_field *field;
static char handler[256];
unsigned long long val;
@@ -537,7 +539,7 @@ static int perl_stop_script(void)
static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
{
- struct tep_event_format *event = NULL;
+ struct tep_event *event = NULL;
struct tep_format_field *f;
char fname[PATH_MAX];
int not_first, count;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 69aa93d4ee99..0e17db41b49b 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -44,6 +44,8 @@
#include "../thread-stack.h"
#include "../trace-event.h"
#include "../call-path.h"
+#include "map.h"
+#include "symbol.h"
#include "thread_map.h"
#include "cpumap.h"
#include "print_binary.h"
@@ -264,7 +266,7 @@ static void define_field(enum tep_print_arg_type field_type,
Py_DECREF(t);
}
-static void define_event_symbols(struct tep_event_format *event,
+static void define_event_symbols(struct tep_event *event,
const char *ev_name,
struct tep_print_arg *args)
{
@@ -332,7 +334,7 @@ static void define_event_symbols(struct tep_event_format *event,
define_event_symbols(event, ev_name, args->next);
}
-static PyObject *get_field_numeric_entry(struct tep_event_format *event,
+static PyObject *get_field_numeric_entry(struct tep_event *event,
struct tep_format_field *field, void *data)
{
bool is_array = field->flags & TEP_FIELD_IS_ARRAY;
@@ -494,14 +496,14 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
pydict_set_item_string_decref(pyelem, "cycles",
PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
- thread__find_map(thread, sample->cpumode,
- br->entries[i].from, &al);
+ thread__find_map_fb(thread, sample->cpumode,
+ br->entries[i].from, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
_PyUnicode_FromString(dsoname));
- thread__find_map(thread, sample->cpumode,
- br->entries[i].to, &al);
+ thread__find_map_fb(thread, sample->cpumode,
+ br->entries[i].to, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
@@ -576,14 +578,14 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
if (!pyelem)
Py_FatalError("couldn't create Python dictionary");
- thread__find_symbol(thread, sample->cpumode,
- br->entries[i].from, &al);
+ thread__find_symbol_fb(thread, sample->cpumode,
+ br->entries[i].from, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
_PyUnicode_FromString(bf));
- thread__find_symbol(thread, sample->cpumode,
- br->entries[i].to, &al);
+ thread__find_symbol_fb(thread, sample->cpumode,
+ br->entries[i].to, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "to",
_PyUnicode_FromString(bf));
@@ -733,8 +735,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
- pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize(
- (const char *)&evsel->attr, sizeof(evsel->attr)));
+ pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr)));
pydict_set_item_string_decref(dict_sample, "pid",
_PyLong_FromLong(sample->pid));
@@ -790,7 +791,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al)
{
- struct tep_event_format *event = evsel->tp_format;
+ struct tep_event *event = evsel->tp_format;
PyObject *handler, *context, *t, *obj = NULL, *callchain;
PyObject *dict = NULL, *all_entries_dict = NULL;
static char handler_name[256];
@@ -1494,34 +1495,40 @@ static void _free_command_line(wchar_t **command_line, int num)
static int python_start_script(const char *script, int argc, const char **argv)
{
struct tables *tables = &tables_global;
+ PyMODINIT_FUNC (*initfunc)(void);
#if PY_MAJOR_VERSION < 3
const char **command_line;
#else
wchar_t **command_line;
#endif
- char buf[PATH_MAX];
+ /*
+ * Use a non-const name variable to cope with python 2.6's
+ * PyImport_AppendInittab prototype
+ */
+ char buf[PATH_MAX], name[19] = "perf_trace_context";
int i, err = 0;
FILE *fp;
#if PY_MAJOR_VERSION < 3
+ initfunc = initperf_trace_context;
command_line = malloc((argc + 1) * sizeof(const char *));
command_line[0] = script;
for (i = 1; i < argc + 1; i++)
command_line[i] = argv[i - 1];
#else
+ initfunc = PyInit_perf_trace_context;
command_line = malloc((argc + 1) * sizeof(wchar_t *));
command_line[0] = Py_DecodeLocale(script, NULL);
for (i = 1; i < argc + 1; i++)
command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
#endif
+ PyImport_AppendInittab(name, initfunc);
Py_Initialize();
#if PY_MAJOR_VERSION < 3
- initperf_trace_context();
PySys_SetArgv(argc + 1, (char **)command_line);
#else
- PyInit_perf_trace_context();
PySys_SetArgv(argc + 1, command_line);
#endif
@@ -1590,7 +1597,7 @@ static int python_stop_script(void)
static int python_generate_script(struct tep_handle *pevent, const char *outfile)
{
- struct tep_event_format *event = NULL;
+ struct tep_event *event = NULL;
struct tep_format_field *f;
char fname[PATH_MAX];
int not_first, count;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7d2c8ce6cfad..c764bbc91009 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -13,6 +13,8 @@
#include "evlist.h"
#include "evsel.h"
#include "memswap.h"
+#include "map.h"
+#include "symbol.h"
#include "session.h"
#include "tool.h"
#include "sort.h"
@@ -23,7 +25,9 @@
#include "auxtrace.h"
#include "thread.h"
#include "thread-stack.h"
+#include "sample-raw.h"
#include "stat.h"
+#include "arch/common.h"
static int perf_session__deliver_event(struct perf_session *session,
union perf_event *event,
@@ -125,7 +129,8 @@ struct perf_session *perf_session__new(struct perf_data *data,
session->tool = tool;
INIT_LIST_HEAD(&session->auxtrace_index);
machines__init(&session->machines);
- ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
+ ordered_events__init(&session->ordered_events,
+ ordered_events__deliver_event, NULL);
if (data) {
if (perf_data__open(data))
@@ -145,11 +150,16 @@ struct perf_session *perf_session__new(struct perf_data *data,
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
}
+
+ perf_evlist__init_trace_event_sample_raw(session->evlist);
}
} else {
session->machines.host.env = &perf_env;
}
+ session->machines.host.single_address_space =
+ perf_env__single_address_space(session->machines.host.env);
+
if (!data || perf_data__is_write(data)) {
/*
* In O_RDONLY mode this will be performed when reading the
@@ -371,6 +381,10 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->itrace_start = perf_event__process_itrace_start;
if (tool->context_switch == NULL)
tool->context_switch = perf_event__process_switch;
+ if (tool->ksymbol == NULL)
+ tool->ksymbol = perf_event__process_ksymbol;
+ if (tool->bpf_event == NULL)
+ tool->bpf_event = perf_event__process_bpf_event;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -689,7 +703,10 @@ static void perf_event__auxtrace_error_swap(union perf_event *event,
event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
+ event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt);
event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
+ if (event->auxtrace_error.fmt)
+ event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
}
static void perf_event__thread_map_swap(union perf_event *event,
@@ -1060,6 +1077,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event,
file_offset, event->header.size, event->header.type);
trace_event(event);
+ if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
+ evlist->trace_event_sample_raw(evlist, event, sample);
if (sample)
perf_evlist__print_tstamp(evlist, event, sample);
@@ -1183,6 +1202,13 @@ static int deliver_sample_value(struct perf_evlist *evlist,
return 0;
}
+ /*
+ * There's no reason to deliver sample
+ * for zero period, bail out.
+ */
+ if (!sample->period)
+ return 0;
+
return tool->sample(tool, event, sample, sid->evsel, machine);
}
@@ -1300,6 +1326,10 @@ static int machines__deliver_event(struct machines *machines,
case PERF_RECORD_SWITCH:
case PERF_RECORD_SWITCH_CPU_WIDE:
return tool->context_switch(tool, event, sample, machine);
+ case PERF_RECORD_KSYMBOL:
+ return tool->ksymbol(tool, event, sample, machine);
+ case PERF_RECORD_BPF_EVENT:
+ return tool->bpf_event(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -1522,6 +1552,13 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
return machine__findnew_thread(&session->machines.host, -1, pid);
}
+/*
+ * Threads are identified by pid and tid, and the idle task has pid == tid == 0.
+ * So here a single thread is created for that, but actually there is a separate
+ * idle task per cpu, so there should be one 'struct thread' per cpu, but there
+ * is only 1. That causes problems for some tools, requiring workarounds. For
+ * example get_idle_thread() in builtin-sched.c, or thread_stack__per_cpu().
+ */
int perf_session__register_idle_thread(struct perf_session *session)
{
struct thread *thread;
@@ -1808,38 +1845,35 @@ fetch_mmaped_event(struct perf_session *session,
#define NUM_MMAPS 128
#endif
-static int __perf_session__process_events(struct perf_session *session,
- u64 data_offset, u64 data_size,
- u64 file_size)
+struct reader {
+ int fd;
+ u64 data_size;
+ u64 data_offset;
+};
+
+static int
+reader__process_events(struct reader *rd, struct perf_session *session,
+ struct ui_progress *prog)
{
- struct ordered_events *oe = &session->ordered_events;
- struct perf_tool *tool = session->tool;
- int fd = perf_data__fd(session->data);
+ u64 data_size = rd->data_size;
u64 head, page_offset, file_offset, file_pos, size;
- int err, mmap_prot, mmap_flags, map_idx = 0;
+ int err = 0, mmap_prot, mmap_flags, map_idx = 0;
size_t mmap_size;
char *buf, *mmaps[NUM_MMAPS];
union perf_event *event;
- struct ui_progress prog;
s64 skip;
- perf_tool__fill_defaults(tool);
-
- page_offset = page_size * (data_offset / page_size);
+ page_offset = page_size * (rd->data_offset / page_size);
file_offset = page_offset;
- head = data_offset - page_offset;
+ head = rd->data_offset - page_offset;
- if (data_size == 0)
- goto out;
-
- if (data_offset + data_size < file_size)
- file_size = data_offset + data_size;
+ ui_progress__init_size(prog, data_size, "Processing events...");
- ui_progress__init_size(&prog, file_size, "Processing events...");
+ data_size += rd->data_offset;
mmap_size = MMAP_SIZE;
- if (mmap_size > file_size) {
- mmap_size = file_size;
+ if (mmap_size > data_size) {
+ mmap_size = data_size;
session->one_mmap = true;
}
@@ -1853,12 +1887,12 @@ static int __perf_session__process_events(struct perf_session *session,
mmap_flags = MAP_PRIVATE;
}
remap:
- buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd,
+ buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd,
file_offset);
if (buf == MAP_FAILED) {
pr_err("failed to mmap file\n");
err = -errno;
- goto out_err;
+ goto out;
}
mmaps[map_idx] = buf;
map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
@@ -1890,7 +1924,7 @@ more:
file_offset + head, event->header.size,
event->header.type);
err = -EINVAL;
- goto out_err;
+ goto out;
}
if (skip)
@@ -1899,15 +1933,40 @@ more:
head += size;
file_pos += size;
- ui_progress__update(&prog, size);
+ ui_progress__update(prog, size);
if (session_done())
goto out;
- if (file_pos < file_size)
+ if (file_pos < data_size)
goto more;
out:
+ return err;
+}
+
+static int __perf_session__process_events(struct perf_session *session)
+{
+ struct reader rd = {
+ .fd = perf_data__fd(session->data),
+ .data_size = session->header.data_size,
+ .data_offset = session->header.data_offset,
+ };
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ struct ui_progress prog;
+ int err;
+
+ perf_tool__fill_defaults(tool);
+
+ if (rd.data_size == 0)
+ return -1;
+
+ ui_progress__init_size(&prog, rd.data_size, "Processing events...");
+
+ err = reader__process_events(&rd, session, &prog);
+ if (err)
+ goto out_err;
/* do the final flush for ordered samples */
err = ordered_events__flush(oe, OE_FLUSH__FINAL);
if (err)
@@ -1932,20 +1991,13 @@ out_err:
int perf_session__process_events(struct perf_session *session)
{
- u64 size = perf_data__size(session->data);
- int err;
-
if (perf_session__register_idle_thread(session) < 0)
return -ENOMEM;
- if (!perf_data__is_pipe(session->data))
- err = __perf_session__process_events(session,
- session->header.data_offset,
- session->header.data_size, size);
- else
- err = __perf_session__process_pipe_events(session);
+ if (perf_data__is_pipe(session->data))
+ return __perf_session__process_pipe_events(session);
- return err;
+ return __perf_session__process_events(session);
}
bool perf_session__has_traces(struct perf_session *session, const char *msg)
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 63f758c655d5..5b5a167b43ce 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,5 +1,3 @@
-#!/usr/bin/python
-
from os import getenv
from subprocess import Popen, PIPE
from re import sub
@@ -17,6 +15,8 @@ if cc == "clang":
vars[var] = sub("-mcet", "", vars[var])
if not clang_has_option("-fcf-protection"):
vars[var] = sub("-fcf-protection", "", vars[var])
+ if not clang_has_option("-fstack-clash-protection"):
+ vars[var] = sub("-fstack-clash-protection", "", vars[var])
from distutils.core import setup, Extension
@@ -53,9 +53,14 @@ ext_sources = [f.strip() for f in open('util/python-ext-sources')
# use full paths with source files
ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
+extra_libraries = []
+if '-DHAVE_LIBNUMA_SUPPORT' in cflags:
+ extra_libraries = [ 'numa' ]
+
perf = Extension('perf',
sources = ext_sources,
include_dirs = ['util/include'],
+ libraries = extra_libraries,
extra_compile_args = cflags,
extra_objects = [libtraceevent, libapikfs],
)
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index f96c005b3c41..d2299e912e59 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -6,6 +6,7 @@
#include "sort.h"
#include "hist.h"
#include "comm.h"
+#include "map.h"
#include "symbol.h"
#include "thread.h"
#include "evsel.h"
@@ -13,6 +14,7 @@
#include "strlist.h"
#include <traceevent/event-parse.h>
#include "mem-events.h"
+#include "annotate.h"
#include <linux/kernel.h>
regex_t parent_regex;
@@ -36,7 +38,7 @@ enum sort_mode sort__mode = SORT_MODE__NORMAL;
* -t, --field-separator
*
* option, that uses a special separator character and don't pad with spaces,
- * replacing all occurances of this separator in symbol names (and other
+ * replacing all occurrences of this separator in symbol names (and other
* output) with a '.' character, that thus it's the only non valid separator.
*/
static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
@@ -229,8 +231,14 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
if (sym_l == sym_r)
return 0;
- if (sym_l->inlined || sym_r->inlined)
- return strcmp(sym_l->name, sym_r->name);
+ if (sym_l->inlined || sym_r->inlined) {
+ int ret = strcmp(sym_l->name, sym_r->name);
+
+ if (ret)
+ return ret;
+ if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start))
+ return 0;
+ }
if (sym_l->start != sym_r->start)
return (int64_t)(sym_r->start - sym_l->start);
@@ -422,6 +430,57 @@ struct sort_entry sort_srcline_to = {
.se_width_idx = HISTC_SRCLINE_TO,
};
+static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+
+ struct symbol *sym = he->ms.sym;
+ struct annotation *notes;
+ double ipc = 0.0, coverage = 0.0;
+ char tmp[64];
+
+ if (!sym)
+ return repsep_snprintf(bf, size, "%-*s", width, "-");
+
+ notes = symbol__annotation(sym);
+
+ if (notes->hit_cycles)
+ ipc = notes->hit_insn / ((double)notes->hit_cycles);
+
+ if (notes->total_insn) {
+ coverage = notes->cover_insn * 100.0 /
+ ((double)notes->total_insn);
+ }
+
+ snprintf(tmp, sizeof(tmp), "%-5.2f [%5.1f%%]", ipc, coverage);
+ return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc = {
+ .se_header = "IPC [IPC Coverage]",
+ .se_cmp = sort__sym_cmp,
+ .se_snprintf = hist_entry__sym_ipc_snprintf,
+ .se_width_idx = HISTC_SYMBOL_IPC,
+};
+
+static int hist_entry__sym_ipc_null_snprintf(struct hist_entry *he
+ __maybe_unused,
+ char *bf, size_t size,
+ unsigned int width)
+{
+ char tmp[64];
+
+ snprintf(tmp, sizeof(tmp), "%-5s %2s", "-", "-");
+ return repsep_snprintf(bf, size, "%-*s", width, tmp);
+}
+
+struct sort_entry sort_sym_ipc_null = {
+ .se_header = "IPC [IPC Coverage]",
+ .se_cmp = sort__sym_cmp,
+ .se_snprintf = hist_entry__sym_ipc_null_snprintf,
+ .se_width_idx = HISTC_SYMBOL_IPC,
+};
+
/* --sort srcfile */
static char no_srcfile[1];
@@ -1574,6 +1633,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
+ DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
};
#undef DIM
@@ -1591,6 +1651,7 @@ static struct sort_dimension bstack_sort_dimensions[] = {
DIM(SORT_CYCLES, "cycles", sort_cycles),
DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
+ DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc),
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a97cf8e6be86..2fbee0b1011c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -9,7 +9,8 @@
#include <linux/list.h>
#include "cache.h"
#include <linux/rbtree.h>
-#include "symbol.h"
+#include "map_symbol.h"
+#include "symbol_conf.h"
#include "string.h"
#include "callchain.h"
#include "values.h"
@@ -145,8 +146,8 @@ struct hist_entry {
union {
/* this is for hierarchical entry structure */
struct {
- struct rb_root hroot_in;
- struct rb_root hroot_out;
+ struct rb_root_cached hroot_in;
+ struct rb_root_cached hroot_out;
}; /* non-leaf entries */
struct rb_root sorted_chain; /* leaf entry has callchains */
};
@@ -229,6 +230,7 @@ enum sort_type {
SORT_SYM_SIZE,
SORT_DSO_SIZE,
SORT_CGROUP_ID,
+ SORT_SYM_IPC_NULL,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
@@ -242,6 +244,7 @@ enum sort_type {
SORT_CYCLES,
SORT_SRCLINE_FROM,
SORT_SRCLINE_TO,
+ SORT_SYM_IPC,
/* memory mode specific sort keys */
__SORT_MEMORY_MODE,
diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c
new file mode 100644
index 000000000000..fcc8630f6dff
--- /dev/null
+++ b/tools/perf/util/srccode.c
@@ -0,0 +1,186 @@
+/*
+ * Manage printing of source lines
+ * Copyright (c) 2017, Intel Corporation.
+ * Author: Andi Kleen
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "linux/list.h"
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+#include <string.h>
+#include "srccode.h"
+#include "debug.h"
+#include "util.h"
+
+#define MAXSRCCACHE (32*1024*1024)
+#define MAXSRCFILES 64
+#define SRC_HTAB_SZ 64
+
+struct srcfile {
+ struct hlist_node hash_nd;
+ struct list_head nd;
+ char *fn;
+ char **lines;
+ char *map;
+ unsigned numlines;
+ size_t maplen;
+};
+
+static struct hlist_head srcfile_htab[SRC_HTAB_SZ];
+static LIST_HEAD(srcfile_list);
+static long map_total_sz;
+static int num_srcfiles;
+
+static unsigned shash(unsigned char *s)
+{
+ unsigned h = 0;
+ while (*s)
+ h = 65599 * h + *s++;
+ return h ^ (h >> 16);
+}
+
+static int countlines(char *map, int maplen)
+{
+ int numl;
+ char *end = map + maplen;
+ char *p = map;
+
+ if (maplen == 0)
+ return 0;
+ numl = 0;
+ while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+ numl++;
+ p++;
+ }
+ if (p < end)
+ numl++;
+ return numl;
+}
+
+static void fill_lines(char **lines, int maxline, char *map, int maplen)
+{
+ int l;
+ char *end = map + maplen;
+ char *p = map;
+
+ if (maplen == 0 || maxline == 0)
+ return;
+ l = 0;
+ lines[l++] = map;
+ while (p < end && (p = memchr(p, '\n', end - p)) != NULL) {
+ if (l >= maxline)
+ return;
+ lines[l++] = ++p;
+ }
+ if (p < end)
+ lines[l] = p;
+}
+
+static void free_srcfile(struct srcfile *sf)
+{
+ list_del(&sf->nd);
+ hlist_del(&sf->hash_nd);
+ map_total_sz -= sf->maplen;
+ munmap(sf->map, sf->maplen);
+ free(sf->lines);
+ free(sf->fn);
+ free(sf);
+ num_srcfiles--;
+}
+
+static struct srcfile *find_srcfile(char *fn)
+{
+ struct stat st;
+ struct srcfile *h;
+ int fd;
+ unsigned long sz;
+ unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ;
+
+ hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) {
+ if (!strcmp(fn, h->fn)) {
+ /* Move to front */
+ list_del(&h->nd);
+ list_add(&h->nd, &srcfile_list);
+ return h;
+ }
+ }
+
+ /* Only prune if there is more than one entry */
+ while ((num_srcfiles > MAXSRCFILES || map_total_sz > MAXSRCCACHE) &&
+ srcfile_list.next != &srcfile_list) {
+ assert(!list_empty(&srcfile_list));
+ h = list_entry(srcfile_list.prev, struct srcfile, nd);
+ free_srcfile(h);
+ }
+
+ fd = open(fn, O_RDONLY);
+ if (fd < 0 || fstat(fd, &st) < 0) {
+ pr_debug("cannot open source file %s\n", fn);
+ return NULL;
+ }
+
+ h = malloc(sizeof(struct srcfile));
+ if (!h)
+ return NULL;
+
+ h->fn = strdup(fn);
+ if (!h->fn)
+ goto out_h;
+
+ h->maplen = st.st_size;
+ sz = (h->maplen + page_size - 1) & ~(page_size - 1);
+ h->map = mmap(NULL, sz, PROT_READ, MAP_SHARED, fd, 0);
+ close(fd);
+ if (h->map == (char *)-1) {
+ pr_debug("cannot mmap source file %s\n", fn);
+ goto out_fn;
+ }
+ h->numlines = countlines(h->map, h->maplen);
+ h->lines = calloc(h->numlines, sizeof(char *));
+ if (!h->lines)
+ goto out_map;
+ fill_lines(h->lines, h->numlines, h->map, h->maplen);
+ list_add(&h->nd, &srcfile_list);
+ hlist_add_head(&h->hash_nd, &srcfile_htab[hval]);
+ map_total_sz += h->maplen;
+ num_srcfiles++;
+ return h;
+
+out_map:
+ munmap(h->map, sz);
+out_fn:
+ free(h->fn);
+out_h:
+ free(h);
+ return NULL;
+}
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp)
+{
+ char *l, *p;
+ struct srcfile *sf = find_srcfile(fn);
+ if (!sf)
+ return NULL;
+ line--;
+ if (line >= sf->numlines)
+ return NULL;
+ l = sf->lines[line];
+ if (!l)
+ return NULL;
+ p = memchr(l, '\n', sf->map + sf->maplen - l);
+ *lenp = p - l;
+ return l;
+}
diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h
new file mode 100644
index 000000000000..1b5ed769779c
--- /dev/null
+++ b/tools/perf/util/srccode.h
@@ -0,0 +1,20 @@
+#ifndef SRCCODE_H
+#define SRCCODE_H 1
+
+struct srccode_state {
+ char *srcfile;
+ unsigned line;
+};
+
+static inline void srccode_state_init(struct srccode_state *state)
+{
+ state->srcfile = NULL;
+ state->line = 0;
+}
+
+void srccode_state_free(struct srccode_state *state);
+
+/* Result is not 0 terminated */
+char *find_sourceline(char *fn, unsigned line, int *lenp);
+
+#endif
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index e767c4a9d4d2..10ca1533937e 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -104,7 +104,7 @@ static struct symbol *new_inline_sym(struct dso *dso,
} else {
/* create a fake symbol for the inline frame */
inline_sym = symbol__new(base_sym ? base_sym->start : 0,
- base_sym ? base_sym->end : 0,
+ base_sym ? (base_sym->end - base_sym->start) : 0,
base_sym ? base_sym->binding : 0,
base_sym ? base_sym->type : 0,
funcname);
@@ -548,6 +548,34 @@ out:
return srcline;
}
+/* Returns filename and fills in line number in line */
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line)
+{
+ char *file = NULL;
+ const char *dso_name;
+
+ if (!dso->has_srcline)
+ goto out;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ goto out;
+
+ if (!addr2line(dso_name, addr, &file, line, dso, true, NULL, NULL))
+ goto out;
+
+ dso->a2l_fails = 0;
+ return file;
+
+out:
+ if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+ dso->has_srcline = 0;
+ dso__free_a2l(dso);
+ }
+
+ return NULL;
+}
+
void free_srcline(char *srcline)
{
if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
@@ -566,11 +594,12 @@ struct srcline_node {
struct rb_node rb_node;
};
-void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline)
{
- struct rb_node **p = &tree->rb_node;
+ struct rb_node **p = &tree->rb_root.rb_node;
struct rb_node *parent = NULL;
struct srcline_node *i, *node;
+ bool leftmost = true;
node = zalloc(sizeof(struct srcline_node));
if (!node) {
@@ -586,16 +615,18 @@ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
i = rb_entry(parent, struct srcline_node, rb_node);
if (addr < i->addr)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&node->rb_node, parent, p);
- rb_insert_color(&node->rb_node, tree);
+ rb_insert_color_cached(&node->rb_node, tree, leftmost);
}
-char *srcline__tree_find(struct rb_root *tree, u64 addr)
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr)
{
- struct rb_node *n = tree->rb_node;
+ struct rb_node *n = tree->rb_root.rb_node;
while (n) {
struct srcline_node *i = rb_entry(n, struct srcline_node,
@@ -612,15 +643,15 @@ char *srcline__tree_find(struct rb_root *tree, u64 addr)
return NULL;
}
-void srcline__tree_delete(struct rb_root *tree)
+void srcline__tree_delete(struct rb_root_cached *tree)
{
struct srcline_node *pos;
- struct rb_node *next = rb_first(tree);
+ struct rb_node *next = rb_first_cached(tree);
while (next) {
pos = rb_entry(next, struct srcline_node, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, tree);
+ rb_erase_cached(&pos->rb_node, tree);
free_srcline(pos->srcline);
zfree(&pos);
}
@@ -654,28 +685,32 @@ void inline_node__delete(struct inline_node *node)
free(node);
}
-void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
+void inlines__tree_insert(struct rb_root_cached *tree,
+ struct inline_node *inlines)
{
- struct rb_node **p = &tree->rb_node;
+ struct rb_node **p = &tree->rb_root.rb_node;
struct rb_node *parent = NULL;
const u64 addr = inlines->addr;
struct inline_node *i;
+ bool leftmost = true;
while (*p != NULL) {
parent = *p;
i = rb_entry(parent, struct inline_node, rb_node);
if (addr < i->addr)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&inlines->rb_node, parent, p);
- rb_insert_color(&inlines->rb_node, tree);
+ rb_insert_color_cached(&inlines->rb_node, tree, leftmost);
}
-struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr)
{
- struct rb_node *n = tree->rb_node;
+ struct rb_node *n = tree->rb_root.rb_node;
while (n) {
struct inline_node *i = rb_entry(n, struct inline_node,
@@ -692,15 +727,15 @@ struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
return NULL;
}
-void inlines__tree_delete(struct rb_root *tree)
+void inlines__tree_delete(struct rb_root_cached *tree)
{
struct inline_node *pos;
- struct rb_node *next = rb_first(tree);
+ struct rb_node *next = rb_first_cached(tree);
while (next) {
pos = rb_entry(next, struct inline_node, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, tree);
+ rb_erase_cached(&pos->rb_node, tree);
inline_node__delete(pos);
}
}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index b2bb5502fd62..b11a0aaaa676 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -16,13 +16,14 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
bool show_sym, bool show_addr, bool unwind_inlines,
u64 ip);
void free_srcline(char *srcline);
+char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line);
/* insert the srcline into the DSO, which will take ownership */
-void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline);
+void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline);
/* find previously inserted srcline */
-char *srcline__tree_find(struct rb_root *tree, u64 addr);
+char *srcline__tree_find(struct rb_root_cached *tree, u64 addr);
/* delete all srclines within the tree */
-void srcline__tree_delete(struct rb_root *tree);
+void srcline__tree_delete(struct rb_root_cached *tree);
#define SRCLINE_UNKNOWN ((char *) "??:0")
@@ -45,10 +46,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
void inline_node__delete(struct inline_node *node);
/* insert the inline node list into the DSO, which will take ownership */
-void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines);
+void inlines__tree_insert(struct rb_root_cached *tree,
+ struct inline_node *inlines);
/* find previously inserted inline node list */
-struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr);
+struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr);
/* delete all nodes within the tree of inline_node s */
-void inlines__tree_delete(struct rb_root *tree);
+void inlines__tree_delete(struct rb_root_cached *tree);
#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index e7b4c44ebb62..6d043c78f3c2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -2,6 +2,7 @@
#include <inttypes.h>
#include <linux/time64.h>
#include <math.h>
+#include "color.h"
#include "evlist.h"
#include "evsel.h"
#include "stat.h"
@@ -59,6 +60,15 @@ static void print_noise(struct perf_stat_config *config,
print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg);
}
+static void print_cgroup(struct perf_stat_config *config, struct perf_evsel *evsel)
+{
+ if (nr_cgroups) {
+ const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : "";
+ fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+ }
+}
+
+
static void aggr_printout(struct perf_stat_config *config,
struct perf_evsel *evsel, int id, int nr)
{
@@ -336,8 +346,7 @@ static void abs_printout(struct perf_stat_config *config,
fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
- if (evsel->cgrp)
- fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name);
+ print_cgroup(config, evsel);
}
static bool is_mixed_hw_group(struct perf_evsel *counter)
@@ -431,9 +440,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
config->csv_output ? 0 : -25,
perf_evsel__name(counter));
- if (counter->cgrp)
- fprintf(config->output, "%s%s",
- config->csv_sep, counter->cgrp->name);
+ print_cgroup(config, counter);
if (!config->csv_output)
pm(config, &os, NULL, NULL, "", 0);
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 8ad32763cfff..83d8094be4fe 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -168,7 +168,7 @@ static void reset_stat(struct runtime_stat *st)
struct rb_node *pos, *next;
rblist = &st->value_list;
- next = rb_first(&rblist->entries);
+ next = rb_first_cached(&rblist->entries);
while (next) {
pos = next;
next = rb_next(pos);
@@ -209,12 +209,12 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
int cpu, struct runtime_stat *st)
{
int ctx = evsel_context(counter);
+ u64 count_ns = count;
count *= counter->scale;
- if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||
- perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK))
- update_runtime_stat(st, STAT_NSECS, 0, cpu, count);
+ if (perf_evsel__is_clock(counter))
+ update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index 9005fbe0780e..23092fd6451d 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -109,7 +109,6 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
return ret;
}
len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
- va_end(ap_saved);
if (len > strbuf_avail(sb)) {
pr_debug("this should not happen, your vsnprintf is broken");
va_end(ap_saved);
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index d58f1e08b170..7e82c71dcc42 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -57,7 +57,7 @@ static inline unsigned int strlist__nr_entries(const struct strlist *slist)
/* For strlist iteration */
static inline struct str_node *strlist__first(struct strlist *slist)
{
- struct rb_node *rn = rb_first(&slist->rblist.entries);
+ struct rb_node *rn = rb_first_cached(&slist->rblist.entries);
return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
}
static inline struct str_node *strlist__next(struct str_node *sn)
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1cbada2dc6be..f735ee038713 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -334,7 +334,7 @@ static char *cpu_model(void)
if (file) {
while (fgets(buf, 255, file)) {
if (strstr(buf, "model name")) {
- strncpy(cpu_m, &buf[13], 255);
+ strlcpy(cpu_m, &buf[13], 255);
break;
}
}
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 29770ea61768..4ad106a5f2c0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,8 @@
#include <unistd.h>
#include <inttypes.h>
+#include "map.h"
+#include "map_groups.h"
#include "symbol.h"
#include "demangle-java.h"
#include "demangle-rust.h"
@@ -19,6 +21,20 @@
#define EM_AARCH64 183 /* ARM 64 bit */
#endif
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o) ((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same. */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o) ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field. */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val) ELF64_ST_VISIBILITY (val)
+#endif
+
typedef Elf64_Nhdr GElf_Nhdr;
#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
@@ -87,6 +103,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym)
return GELF_ST_TYPE(sym->st_info);
}
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+ return GELF_ST_VISIBILITY(sym->st_other);
+}
+
#ifndef STT_GNU_IFUNC
#define STT_GNU_IFUNC 10
#endif
@@ -111,7 +132,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
return elf_sym__type(sym) == STT_NOTYPE &&
sym->st_name != 0 &&
sym->st_shndx != SHN_UNDEF &&
- sym->st_shndx != SHN_ABS;
+ sym->st_shndx != SHN_ABS &&
+ elf_sym__visibility(sym) != STV_HIDDEN &&
+ elf_sym__visibility(sym) != STV_INTERNAL;
}
static bool elf_sym__filter(GElf_Sym *sym)
@@ -324,7 +347,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
plt_entry_size = 16;
break;
- default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */
+ case EM_SPARC:
+ plt_header_size = 48;
+ plt_entry_size = 12;
+ break;
+
+ case EM_SPARCV9:
+ plt_header_size = 128;
+ plt_entry_size = 32;
+ break;
+
+ default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
plt_header_size = shdr_plt.sh_entsize;
plt_entry_size = shdr_plt.sh_entsize;
break;
@@ -1947,6 +1980,34 @@ void kcore_extract__delete(struct kcore_extract *kce)
}
#ifdef HAVE_GELF_GETNOTE_SUPPORT
+
+static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off)
+{
+ if (!base_off)
+ return;
+
+ if (tmp->bit32)
+ tmp->addr.a32[SDT_NOTE_IDX_LOC] =
+ tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off -
+ tmp->addr.a32[SDT_NOTE_IDX_BASE];
+ else
+ tmp->addr.a64[SDT_NOTE_IDX_LOC] =
+ tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off -
+ tmp->addr.a64[SDT_NOTE_IDX_BASE];
+}
+
+static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr,
+ GElf_Addr base_off)
+{
+ if (!base_off)
+ return;
+
+ if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR])
+ tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+ else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR])
+ tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off);
+}
+
/**
* populate_sdt_note : Parse raw data and identify SDT note
* @elf: elf of the opened file
@@ -1964,7 +2025,6 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
const char *provider, *name, *args;
struct sdt_note *tmp = NULL;
GElf_Ehdr ehdr;
- GElf_Addr base_off = 0;
GElf_Shdr shdr;
int ret = -EINVAL;
@@ -2060,17 +2120,12 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len,
* base address in the description of the SDT note. If its different,
* then accordingly, adjust the note location.
*/
- if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
- base_off = shdr.sh_offset;
- if (base_off) {
- if (tmp->bit32)
- tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
- tmp->addr.a32[1];
- else
- tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
- tmp->addr.a64[1];
- }
- }
+ if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL))
+ sdt_adjust_loc(tmp, shdr.sh_offset);
+
+ /* Adjust reference counter offset */
+ if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL))
+ sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset);
list_add_tail(&tmp->note_list, sdt_notes);
return 0;
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 7119df77dc0b..17edbd4f6f85 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -3,6 +3,7 @@
#include "util.h"
#include <errno.h>
+#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d188b7588152..758bf5f74e6e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -17,6 +17,7 @@
#include "util.h"
#include "debug.h"
#include "machine.h"
+#include "map.h"
#include "symbol.h"
#include "strlist.h"
#include "intlist.h"
@@ -163,7 +164,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
return arch__choose_best_symbol(syma, symb);
}
-void symbols__fixup_duplicate(struct rb_root *symbols)
+void symbols__fixup_duplicate(struct rb_root_cached *symbols)
{
struct rb_node *nd;
struct symbol *curr, *next;
@@ -171,7 +172,7 @@ void symbols__fixup_duplicate(struct rb_root *symbols)
if (symbol_conf.allow_aliases)
return;
- nd = rb_first(symbols);
+ nd = rb_first_cached(symbols);
while (nd) {
curr = rb_entry(nd, struct symbol, rb_node);
@@ -186,20 +187,20 @@ again:
continue;
if (choose_best_symbol(curr, next) == SYMBOL_A) {
- rb_erase(&next->rb_node, symbols);
+ rb_erase_cached(&next->rb_node, symbols);
symbol__delete(next);
goto again;
} else {
nd = rb_next(&curr->rb_node);
- rb_erase(&curr->rb_node, symbols);
+ rb_erase_cached(&curr->rb_node, symbols);
symbol__delete(curr);
}
}
}
-void symbols__fixup_end(struct rb_root *symbols)
+void symbols__fixup_end(struct rb_root_cached *symbols)
{
- struct rb_node *nd, *prevnd = rb_first(symbols);
+ struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
if (prevnd == NULL)
@@ -282,25 +283,27 @@ void symbol__delete(struct symbol *sym)
free(((void *)sym) - symbol_conf.priv_size);
}
-void symbols__delete(struct rb_root *symbols)
+void symbols__delete(struct rb_root_cached *symbols)
{
struct symbol *pos;
- struct rb_node *next = rb_first(symbols);
+ struct rb_node *next = rb_first_cached(symbols);
while (next) {
pos = rb_entry(next, struct symbol, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase(&pos->rb_node, symbols);
+ rb_erase_cached(&pos->rb_node, symbols);
symbol__delete(pos);
}
}
-void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
+void __symbols__insert(struct rb_root_cached *symbols,
+ struct symbol *sym, bool kernel)
{
- struct rb_node **p = &symbols->rb_node;
+ struct rb_node **p = &symbols->rb_root.rb_node;
struct rb_node *parent = NULL;
const u64 ip = sym->start;
struct symbol *s;
+ bool leftmost = true;
if (kernel) {
const char *name = sym->name;
@@ -318,26 +321,28 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
s = rb_entry(parent, struct symbol, rb_node);
if (ip < s->start)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&sym->rb_node, parent, p);
- rb_insert_color(&sym->rb_node, symbols);
+ rb_insert_color_cached(&sym->rb_node, symbols, leftmost);
}
-void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
{
__symbols__insert(symbols, sym, false);
}
-static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
+static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
{
struct rb_node *n;
if (symbols == NULL)
return NULL;
- n = symbols->rb_node;
+ n = symbols->rb_root.rb_node;
while (n) {
struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -353,9 +358,9 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
return NULL;
}
-static struct symbol *symbols__first(struct rb_root *symbols)
+static struct symbol *symbols__first(struct rb_root_cached *symbols)
{
- struct rb_node *n = rb_first(symbols);
+ struct rb_node *n = rb_first_cached(symbols);
if (n)
return rb_entry(n, struct symbol, rb_node);
@@ -363,9 +368,9 @@ static struct symbol *symbols__first(struct rb_root *symbols)
return NULL;
}
-static struct symbol *symbols__last(struct rb_root *symbols)
+static struct symbol *symbols__last(struct rb_root_cached *symbols)
{
- struct rb_node *n = rb_last(symbols);
+ struct rb_node *n = rb_last(&symbols->rb_root);
if (n)
return rb_entry(n, struct symbol, rb_node);
@@ -383,11 +388,12 @@ static struct symbol *symbols__next(struct symbol *sym)
return NULL;
}
-static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
+static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym)
{
- struct rb_node **p = &symbols->rb_node;
+ struct rb_node **p = &symbols->rb_root.rb_node;
struct rb_node *parent = NULL;
struct symbol_name_rb_node *symn, *s;
+ bool leftmost = true;
symn = container_of(sym, struct symbol_name_rb_node, sym);
@@ -396,19 +402,21 @@ static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
if (strcmp(sym->name, s->sym.name) < 0)
p = &(*p)->rb_left;
- else
+ else {
p = &(*p)->rb_right;
+ leftmost = false;
+ }
}
rb_link_node(&symn->rb_node, parent, p);
- rb_insert_color(&symn->rb_node, symbols);
+ rb_insert_color_cached(&symn->rb_node, symbols, leftmost);
}
-static void symbols__sort_by_name(struct rb_root *symbols,
- struct rb_root *source)
+static void symbols__sort_by_name(struct rb_root_cached *symbols,
+ struct rb_root_cached *source)
{
struct rb_node *nd;
- for (nd = rb_first(source); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) {
struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
symbols__insert_by_name(symbols, pos);
}
@@ -431,7 +439,7 @@ int symbol__match_symbol_name(const char *name, const char *str,
return arch__compare_symbol_names(name, str);
}
-static struct symbol *symbols__find_by_name(struct rb_root *symbols,
+static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols,
const char *name,
enum symbol_tag_include includes)
{
@@ -441,7 +449,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
if (symbols == NULL)
return NULL;
- n = symbols->rb_node;
+ n = symbols->rb_root.rb_node;
while (n) {
int cmp;
@@ -614,6 +622,7 @@ out:
static bool symbol__is_idle(const char *name)
{
const char * const idle_symbols[] = {
+ "arch_cpu_idle",
"cpu_idle",
"cpu_startup_entry",
"intel_idle",
@@ -643,7 +652,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
{
struct symbol *sym;
struct dso *dso = arg;
- struct rb_root *root = &dso->symbols;
+ struct rb_root_cached *root = &dso->symbols;
if (!symbol_type__filter(type))
return 0;
@@ -680,14 +689,14 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
struct map *curr_map;
struct symbol *pos;
int count = 0;
- struct rb_root old_root = dso->symbols;
- struct rb_root *root = &dso->symbols;
- struct rb_node *next = rb_first(root);
+ struct rb_root_cached old_root = dso->symbols;
+ struct rb_root_cached *root = &dso->symbols;
+ struct rb_node *next = rb_first_cached(root);
if (!kmaps)
return -1;
- *root = RB_ROOT;
+ *root = RB_ROOT_CACHED;
while (next) {
char *module;
@@ -695,8 +704,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
pos = rb_entry(next, struct symbol, rb_node);
next = rb_next(&pos->rb_node);
- rb_erase_init(&pos->rb_node, &old_root);
-
+ rb_erase_cached(&pos->rb_node, &old_root);
+ RB_CLEAR_NODE(&pos->rb_node);
module = strchr(pos->name, '\t');
if (module)
*module = '\0';
@@ -709,6 +718,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
}
pos->start -= curr_map->start - curr_map->pgoff;
+ if (pos->end > curr_map->end)
+ pos->end = curr_map->end;
if (pos->end)
pos->end -= curr_map->start - curr_map->pgoff;
symbols__insert(&curr_map->dso->symbols, pos);
@@ -733,8 +744,8 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso,
struct map *curr_map = initial_map;
struct symbol *pos;
int count = 0, moved = 0;
- struct rb_root *root = &dso->symbols;
- struct rb_node *next = rb_first(root);
+ struct rb_root_cached *root = &dso->symbols;
+ struct rb_node *next = rb_first_cached(root);
int kernel_range = 0;
bool x86_64;
@@ -848,7 +859,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso,
}
add_symbol:
if (curr_map != initial_map) {
- rb_erase(&pos->rb_node, root);
+ rb_erase_cached(&pos->rb_node, root);
symbols__insert(&curr_map->dso->symbols, pos);
++moved;
} else
@@ -856,7 +867,7 @@ add_symbol:
continue;
discard_symbol:
- rb_erase(&pos->rb_node, root);
+ rb_erase_cached(&pos->rb_node, root);
symbol__delete(pos);
}
@@ -1537,17 +1548,6 @@ int dso__load(struct dso *dso, struct map *map)
dso->adjust_symbols = 0;
if (perfmap) {
- struct stat st;
-
- if (lstat(map_path, &st) < 0)
- goto out;
-
- if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) {
- pr_warning("File %s not owned by current user or root, "
- "ignoring it (use -f to override).\n", map_path);
- goto out;
- }
-
ret = dso__load_perf_map(map_path, dso);
dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
DSO_BINARY_TYPE__NOT_FOUND;
@@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
{
struct maps *maps = &mg->maps;
struct map *map;
+ struct rb_node *node;
down_read(&maps->lock);
- for (map = maps__first(maps); map; map = map__next(map)) {
- if (map->dso && strcmp(map->dso->short_name, name) == 0)
+ for (node = maps->names.rb_node; node; ) {
+ int rc;
+
+ map = rb_entry(node, struct map, rb_node_name);
+
+ rc = strcmp(map->dso->short_name, name);
+ if (rc < 0)
+ node = node->rb_left;
+ else if (rc > 0)
+ node = node->rb_right;
+ else
+
goto out_unlock;
}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f25fae4b5743..9a8fe012910a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -5,16 +5,13 @@
#include <linux/types.h>
#include <stdbool.h>
#include <stdint.h>
-#include "map.h"
-#include "../perf.h"
#include <linux/list.h>
#include <linux/rbtree.h>
#include <stdio.h>
-#include <byteswap.h>
-#include <libgen.h>
-#include "build-id.h"
-#include "event.h"
+#include "map_symbol.h"
+#include "branch.h"
#include "path.h"
+#include "symbol_conf.h"
#ifdef HAVE_LIBELF_SUPPORT
#include <libelf.h>
@@ -24,6 +21,10 @@
#include "dso.h"
+struct map;
+struct map_groups;
+struct option;
+
/*
* libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
* for newer versions we can use mmap to reduce memory usage:
@@ -63,11 +64,12 @@ struct symbol {
u8 ignore:1;
u8 inlined:1;
u8 arch_sym;
+ bool annotate2;
char name[0];
};
void symbol__delete(struct symbol *sym);
-void symbols__delete(struct rb_root *symbols);
+void symbols__delete(struct rb_root_cached *symbols);
/* symbols__for_each_entry - iterate over symbols (rb_root)
*
@@ -76,7 +78,7 @@ void symbols__delete(struct rb_root *symbols);
* @nd: the 'struct rb_node *' to use as a temporary storage
*/
#define symbols__for_each_entry(symbols, pos, nd) \
- for (nd = rb_first(symbols); \
+ for (nd = rb_first_cached(symbols); \
nd && (pos = rb_entry(nd, struct symbol, rb_node)); \
nd = rb_next(nd))
@@ -88,68 +90,6 @@ static inline size_t symbol__size(const struct symbol *sym)
struct strlist;
struct intlist;
-struct symbol_conf {
- unsigned short priv_size;
- bool try_vmlinux_path,
- init_annotation,
- force,
- ignore_vmlinux,
- ignore_vmlinux_buildid,
- show_kernel_path,
- use_modules,
- allow_aliases,
- sort_by_name,
- show_nr_samples,
- show_total_period,
- use_callchain,
- cumulate_callchain,
- show_branchflag_count,
- exclude_other,
- show_cpu_utilization,
- initialized,
- kptr_restrict,
- event_group,
- demangle,
- demangle_kernel,
- filter_relative,
- show_hist_headers,
- branch_callstack,
- has_filter,
- show_ref_callgraph,
- hide_unresolved,
- raw_trace,
- report_hierarchy,
- inline_name;
- const char *vmlinux_name,
- *kallsyms_name,
- *source_prefix,
- *field_sep;
- const char *default_guest_vmlinux_name,
- *default_guest_kallsyms,
- *default_guest_modules;
- const char *guestmount;
- const char *dso_list_str,
- *comm_list_str,
- *pid_list_str,
- *tid_list_str,
- *sym_list_str,
- *col_width_list_str,
- *bt_stop_list_str;
- struct strlist *dso_list,
- *comm_list,
- *sym_list,
- *dso_from_list,
- *dso_to_list,
- *sym_from_list,
- *sym_to_list,
- *bt_stop_list;
- struct intlist *pid_list,
- *tid_list;
- const char *symfs;
-};
-
-extern struct symbol_conf symbol_conf;
-
struct symbol_name_rb_node {
struct rb_node rb_node;
struct symbol sym;
@@ -176,19 +116,6 @@ struct ref_reloc_sym {
u64 unrelocated_addr;
};
-struct map_symbol {
- struct map *map;
- struct symbol *sym;
-};
-
-struct addr_map_symbol {
- struct map *map;
- struct symbol *sym;
- u64 addr;
- u64 al_addr;
- u64 phys_addr;
-};
-
struct branch_info {
struct addr_map_symbol from;
struct addr_map_symbol to;
@@ -308,10 +235,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
-void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
-void symbols__insert(struct rb_root *symbols, struct symbol *sym);
-void symbols__fixup_duplicate(struct rb_root *symbols);
-void symbols__fixup_end(struct rb_root *symbols);
+void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
+ bool kernel);
+void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
+void symbols__fixup_duplicate(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols);
void map_groups__fixup_end(struct map_groups *mg);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -379,12 +307,19 @@ int get_sdt_note_list(struct list_head *head, const char *target);
int cleanup_sdt_note_list(struct list_head *sdt_notes);
int sdt_notes__get_count(struct list_head *start);
+#define SDT_PROBES_SCN ".probes"
#define SDT_BASE_SCN ".stapsdt.base"
#define SDT_NOTE_SCN ".note.stapsdt"
#define SDT_NOTE_TYPE 3
#define SDT_NOTE_NAME "stapsdt"
#define NR_ADDR 3
+enum {
+ SDT_NOTE_IDX_LOC = 0,
+ SDT_NOTE_IDX_BASE,
+ SDT_NOTE_IDX_REFCTR,
+};
+
struct mem_info *mem_info__new(void);
struct mem_info *mem_info__get(struct mem_info *mi);
void mem_info__put(struct mem_info *mi);
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
new file mode 100644
index 000000000000..fffea68c1203
--- /dev/null
+++ b/tools/perf/util/symbol_conf.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL_CONF
+#define __PERF_SYMBOL_CONF 1
+
+#include <stdbool.h>
+
+struct strlist;
+struct intlist;
+
+struct symbol_conf {
+ unsigned short priv_size;
+ bool try_vmlinux_path,
+ init_annotation,
+ force,
+ ignore_vmlinux,
+ ignore_vmlinux_buildid,
+ show_kernel_path,
+ use_modules,
+ allow_aliases,
+ sort_by_name,
+ show_nr_samples,
+ show_total_period,
+ use_callchain,
+ cumulate_callchain,
+ show_branchflag_count,
+ exclude_other,
+ show_cpu_utilization,
+ initialized,
+ kptr_restrict,
+ event_group,
+ demangle,
+ demangle_kernel,
+ filter_relative,
+ show_hist_headers,
+ branch_callstack,
+ has_filter,
+ show_ref_callgraph,
+ hide_unresolved,
+ raw_trace,
+ report_hierarchy,
+ inline_name;
+ const char *vmlinux_name,
+ *kallsyms_name,
+ *source_prefix,
+ *field_sep,
+ *graph_function;
+ const char *default_guest_vmlinux_name,
+ *default_guest_kallsyms,
+ *default_guest_modules;
+ const char *guestmount;
+ const char *dso_list_str,
+ *comm_list_str,
+ *pid_list_str,
+ *tid_list_str,
+ *sym_list_str,
+ *col_width_list_str,
+ *bt_stop_list_str;
+ struct strlist *dso_list,
+ *comm_list,
+ *sym_list,
+ *dso_from_list,
+ *dso_to_list,
+ *sym_from_list,
+ *sym_to_list,
+ *bt_stop_list;
+ struct intlist *pid_list,
+ *tid_list;
+ const char *symfs;
+};
+
+extern struct symbol_conf symbol_conf;
+
+#endif // __PERF_SYMBOL_CONF
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index ed0205cc7942..02e89b02c2ce 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -3,6 +3,7 @@
#include <inttypes.h>
#include <stdio.h>
+#include "map.h"
#include "symbol.h"
size_t symbol__fprintf(struct symbol *sym, FILE *fp)
@@ -64,7 +65,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso,
struct rb_node *nd;
struct symbol_name_rb_node *pos;
- for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
+ for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
fprintf(fp, "%s\n", pos->sym.name);
}
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index c091635bf7dc..a8b45168513c 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -15,10 +15,12 @@
#include <linux/rbtree.h>
#include <linux/list.h>
+#include <linux/log2.h>
#include <errno.h>
#include "thread.h"
#include "event.h"
#include "machine.h"
+#include "env.h"
#include "util.h"
#include "debug.h"
#include "symbol.h"
@@ -28,6 +30,19 @@
#define STACK_GROWTH 2048
+/*
+ * State of retpoline detection.
+ *
+ * RETPOLINE_NONE: no retpoline detection
+ * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
+ * X86_RETPOLINE_DETECTED: x86 retpoline detected
+ */
+enum retpoline_state_t {
+ RETPOLINE_NONE,
+ X86_RETPOLINE_POSSIBLE,
+ X86_RETPOLINE_DETECTED,
+};
+
/**
* struct thread_stack_entry - thread stack entry.
* @ret_addr: return address
@@ -37,6 +52,7 @@
* @cp: call path
* @no_call: a 'call' was not seen
* @trace_end: a 'call' but trace ended
+ * @non_call: a branch but not a 'call' to the start of a different symbol
*/
struct thread_stack_entry {
u64 ret_addr;
@@ -46,6 +62,7 @@ struct thread_stack_entry {
struct call_path *cp;
bool no_call;
bool trace_end;
+ bool non_call;
};
/**
@@ -60,6 +77,8 @@ struct thread_stack_entry {
* @last_time: last timestamp
* @crp: call/return processor
* @comm: current comm
+ * @arr_sz: size of array if this is the first element of an array
+ * @rstate: used to detect retpolines
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -71,8 +90,20 @@ struct thread_stack {
u64 last_time;
struct call_return_processor *crp;
struct comm *comm;
+ unsigned int arr_sz;
+ enum retpoline_state_t rstate;
};
+/*
+ * Assume pid == tid == 0 identifies the idle task as defined by
+ * perf_session__register_idle_thread(). The idle task is really 1 task per cpu,
+ * and therefore requires a stack for each cpu.
+ */
+static inline bool thread_stack__per_cpu(struct thread *thread)
+{
+ return !(thread->tid || thread->pid_);
+}
+
static int thread_stack__grow(struct thread_stack *ts)
{
struct thread_stack_entry *new_stack;
@@ -91,29 +122,93 @@ static int thread_stack__grow(struct thread_stack *ts)
return 0;
}
-static struct thread_stack *thread_stack__new(struct thread *thread,
+static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
+ struct call_return_processor *crp)
+{
+ int err;
+
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+
+ if (thread->mg && thread->mg->machine) {
+ struct machine *machine = thread->mg->machine;
+ const char *arch = perf_env__arch(machine->env);
+
+ ts->kernel_start = machine__kernel_start(machine);
+ if (!strcmp(arch, "x86"))
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+ } else {
+ ts->kernel_start = 1ULL << 63;
+ }
+ ts->crp = crp;
+
+ return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
struct call_return_processor *crp)
{
- struct thread_stack *ts;
+ struct thread_stack *ts = thread->ts, *new_ts;
+ unsigned int old_sz = ts ? ts->arr_sz : 0;
+ unsigned int new_sz = 1;
+
+ if (thread_stack__per_cpu(thread) && cpu > 0)
+ new_sz = roundup_pow_of_two(cpu + 1);
+
+ if (!ts || new_sz > old_sz) {
+ new_ts = calloc(new_sz, sizeof(*ts));
+ if (!new_ts)
+ return NULL;
+ if (ts)
+ memcpy(new_ts, ts, old_sz * sizeof(*ts));
+ new_ts->arr_sz = new_sz;
+ zfree(&thread->ts);
+ thread->ts = new_ts;
+ ts = new_ts;
+ }
- ts = zalloc(sizeof(struct thread_stack));
- if (!ts)
+ if (thread_stack__per_cpu(thread) && cpu > 0 &&
+ (unsigned int)cpu < ts->arr_sz)
+ ts += cpu;
+
+ if (!ts->stack &&
+ thread_stack__init(ts, thread, crp))
return NULL;
- if (thread_stack__grow(ts)) {
- free(ts);
+ return ts;
+}
+
+static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu)
+{
+ struct thread_stack *ts = thread->ts;
+
+ if (cpu < 0)
+ cpu = 0;
+
+ if (!ts || (unsigned int)cpu >= ts->arr_sz)
return NULL;
- }
- if (thread->mg && thread->mg->machine)
- ts->kernel_start = machine__kernel_start(thread->mg->machine);
- else
- ts->kernel_start = 1ULL << 63;
- ts->crp = crp;
+ ts += cpu;
+
+ if (!ts->stack)
+ return NULL;
return ts;
}
+static inline struct thread_stack *thread__stack(struct thread *thread,
+ int cpu)
+{
+ if (!thread)
+ return NULL;
+
+ if (thread_stack__per_cpu(thread))
+ return thread__cpu_stack(thread, cpu);
+
+ return thread->ts;
+}
+
static int thread_stack__push(struct thread_stack *ts, u64 ret_addr,
bool trace_end)
{
@@ -197,6 +292,8 @@ static int thread_stack__call_return(struct thread *thread,
cr.flags |= CALL_RETURN_NO_CALL;
if (no_return)
cr.flags |= CALL_RETURN_NO_RETURN;
+ if (tse->non_call)
+ cr.flags |= CALL_RETURN_NON_CALL;
return crp->process(&cr, crp->data);
}
@@ -226,25 +323,37 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
int thread_stack__flush(struct thread *thread)
{
- if (thread->ts)
- return __thread_stack__flush(thread, thread->ts);
+ struct thread_stack *ts = thread->ts;
+ unsigned int pos;
+ int err = 0;
- return 0;
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++) {
+ int ret = __thread_stack__flush(thread, ts + pos);
+
+ if (ret)
+ err = ret;
+ }
+ }
+
+ return err;
}
-int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr)
{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
if (!thread)
return -EINVAL;
- if (!thread->ts) {
- thread->ts = thread_stack__new(thread, NULL);
- if (!thread->ts) {
+ if (!ts) {
+ ts = thread_stack__new(thread, cpu, NULL);
+ if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
- thread->ts->trace_nr = trace_nr;
+ ts->trace_nr = trace_nr;
}
/*
@@ -252,14 +361,14 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* the stack might be completely invalid. Better to report nothing than
* to report something misleading, so flush the stack.
*/
- if (trace_nr != thread->ts->trace_nr) {
- if (thread->ts->trace_nr)
- __thread_stack__flush(thread, thread->ts);
- thread->ts->trace_nr = trace_nr;
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
}
/* Stop here if thread_stack__process() is in use */
- if (thread->ts->crp)
+ if (ts->crp)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@@ -270,7 +379,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
ret_addr = from_ip + insn_len;
if (ret_addr == to_ip)
return 0; /* Zero-length calls are excluded */
- return thread_stack__push(thread->ts, ret_addr,
+ return thread_stack__push(ts, ret_addr,
flags & PERF_IP_FLAG_TRACE_END);
} else if (flags & PERF_IP_FLAG_TRACE_BEGIN) {
/*
@@ -280,50 +389,98 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
* address, so try to pop that. Also, do not expect a call made
* when the trace ended, to return, so pop that.
*/
- thread_stack__pop(thread->ts, to_ip);
- thread_stack__pop_trace_end(thread->ts);
+ thread_stack__pop(ts, to_ip);
+ thread_stack__pop_trace_end(ts);
} else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) {
- thread_stack__pop(thread->ts, to_ip);
+ thread_stack__pop(ts, to_ip);
}
return 0;
}
-void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr)
{
- if (!thread || !thread->ts)
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
return;
- if (trace_nr != thread->ts->trace_nr) {
- if (thread->ts->trace_nr)
- __thread_stack__flush(thread, thread->ts);
- thread->ts->trace_nr = trace_nr;
+ if (trace_nr != ts->trace_nr) {
+ if (ts->trace_nr)
+ __thread_stack__flush(thread, ts);
+ ts->trace_nr = trace_nr;
}
}
+static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
+{
+ __thread_stack__flush(thread, ts);
+ zfree(&ts->stack);
+}
+
+static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
+{
+ unsigned int arr_sz = ts->arr_sz;
+
+ __thread_stack__free(thread, ts);
+ memset(ts, 0, sizeof(*ts));
+ ts->arr_sz = arr_sz;
+}
+
void thread_stack__free(struct thread *thread)
{
- if (thread->ts) {
- __thread_stack__flush(thread, thread->ts);
- zfree(&thread->ts->stack);
+ struct thread_stack *ts = thread->ts;
+ unsigned int pos;
+
+ if (ts) {
+ for (pos = 0; pos < ts->arr_sz; pos++)
+ __thread_stack__free(thread, ts + pos);
zfree(&thread->ts);
}
}
-void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
- size_t sz, u64 ip)
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
{
- size_t i;
+ return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
- if (!thread || !thread->ts)
- chain->nr = 1;
- else
- chain->nr = min(sz, thread->ts->cnt + 1);
+void thread_stack__sample(struct thread *thread, int cpu,
+ struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 context = callchain_context(ip, kernel_start);
+ u64 last_context;
+ size_t i, j;
- chain->ips[0] = ip;
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ chain->ips[0] = context;
+ chain->ips[1] = ip;
+
+ if (!ts) {
+ chain->nr = 2;
+ return;
+ }
+
+ last_context = context;
+
+ for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (i >= sz - 1)
+ break;
+ chain->ips[i++] = context;
+ last_context = context;
+ }
+ chain->ips[i] = ip;
+ }
- for (i = 1; i < chain->nr; i++)
- chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr;
+ chain->nr = i;
}
struct call_return_processor *
@@ -362,6 +519,9 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
struct thread_stack_entry *tse;
int err;
+ if (!cp)
+ return -ENOMEM;
+
if (ts->cnt == ts->sz) {
err = thread_stack__grow(ts);
if (err)
@@ -376,6 +536,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
tse->cp = cp;
tse->no_call = no_call;
tse->trace_end = trace_end;
+ tse->non_call = false;
return 0;
}
@@ -397,14 +558,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
timestamp, ref, false);
}
- if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
+ if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
+ !ts->stack[ts->cnt - 1].non_call) {
return thread_stack__call_return(thread, ts, --ts->cnt,
timestamp, ref, false);
} else {
size_t i = ts->cnt - 1;
while (i--) {
- if (ts->stack[i].ret_addr != ret_addr)
+ if (ts->stack[i].ret_addr != ret_addr ||
+ ts->stack[i].non_call)
continue;
i += 1;
while (ts->cnt > i) {
@@ -423,7 +586,7 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
return 1;
}
-static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
+static int thread_stack__bottom(struct thread_stack *ts,
struct perf_sample *sample,
struct addr_location *from_al,
struct addr_location *to_al, u64 ref)
@@ -445,10 +608,8 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
ts->kernel_start);
- if (!cp)
- return -ENOMEM;
- return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
+ return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
true, false);
}
@@ -459,36 +620,36 @@ static int thread_stack__no_call_return(struct thread *thread,
struct addr_location *to_al, u64 ref)
{
struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *root = &cpr->call_path;
+ struct symbol *fsym = from_al->sym;
+ struct symbol *tsym = to_al->sym;
struct call_path *cp, *parent;
u64 ks = ts->kernel_start;
+ u64 addr = sample->addr;
+ u64 tm = sample->time;
+ u64 ip = sample->ip;
int err;
- if (sample->ip >= ks && sample->addr < ks) {
+ if (ip >= ks && addr < ks) {
/* Return to userspace, so pop all kernel addresses */
while (thread_stack__in_kernel(ts)) {
err = thread_stack__call_return(thread, ts, --ts->cnt,
- sample->time, ref,
- true);
+ tm, ref, true);
if (err)
return err;
}
/* If the stack is empty, push the userspace address */
if (!ts->cnt) {
- cp = call_path__findnew(cpr, &cpr->call_path,
- to_al->sym, sample->addr,
- ts->kernel_start);
- if (!cp)
- return -ENOMEM;
- return thread_stack__push_cp(ts, 0, sample->time, ref,
- cp, true, false);
+ cp = call_path__findnew(cpr, root, tsym, addr, ks);
+ return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
+ false);
}
- } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
+ } else if (thread_stack__in_kernel(ts) && ip < ks) {
/* Return to userspace, so pop all kernel addresses */
while (thread_stack__in_kernel(ts)) {
err = thread_stack__call_return(thread, ts, --ts->cnt,
- sample->time, ref,
- true);
+ tm, ref, true);
if (err)
return err;
}
@@ -497,21 +658,59 @@ static int thread_stack__no_call_return(struct thread *thread,
if (ts->cnt)
parent = ts->stack[ts->cnt - 1].cp;
else
- parent = &cpr->call_path;
+ parent = root;
- /* This 'return' had no 'call', so push and pop top of stack */
- cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
- ts->kernel_start);
- if (!cp)
- return -ENOMEM;
+ if (parent->sym == from_al->sym) {
+ /*
+ * At the bottom of the stack, assume the missing 'call' was
+ * before the trace started. So, pop the current symbol and push
+ * the 'to' symbol.
+ */
+ if (ts->cnt == 1) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ tm, ref, false);
+ if (err)
+ return err;
+ }
+
+ if (!ts->cnt) {
+ cp = call_path__findnew(cpr, root, tsym, addr, ks);
+
+ return thread_stack__push_cp(ts, addr, tm, ref, cp,
+ true, false);
+ }
+
+ /*
+ * Otherwise assume the 'return' is being used as a jump (e.g.
+ * retpoline) and just push the 'to' symbol.
+ */
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
- err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
- true, false);
+ err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
+ if (!err)
+ ts->stack[ts->cnt - 1].non_call = true;
+
+ return err;
+ }
+
+ /*
+ * Assume 'parent' has not yet returned, so push 'to', and then push and
+ * pop 'from'.
+ */
+
+ cp = call_path__findnew(cpr, parent, tsym, addr, ks);
+
+ err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
if (err)
return err;
- return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
- to_al->sym);
+ cp = call_path__findnew(cpr, cp, fsym, ip, ks);
+
+ err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
+ if (err)
+ return err;
+
+ return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
}
static int thread_stack__trace_begin(struct thread *thread,
@@ -549,8 +748,6 @@ static int thread_stack__trace_end(struct thread_stack *ts,
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
ts->kernel_start);
- if (!cp)
- return -ENOMEM;
ret_addr = sample->ip + sample->insn_len;
@@ -558,33 +755,97 @@ static int thread_stack__trace_end(struct thread_stack *ts,
false, true);
}
+static bool is_x86_retpoline(const char *name)
+{
+ const char *p = strstr(name, "__x86_indirect_thunk_");
+
+ return p == name || !strcmp(name, "__indirect_thunk_start");
+}
+
+/*
+ * x86 retpoline functions pollute the call graph. This function removes them.
+ * This does not handle function return thunks, nor is there any improvement
+ * for the handling of inline thunks or extern thunks.
+ */
+static int thread_stack__x86_retpoline(struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *to_al)
+{
+ struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct symbol *sym = tse->cp->sym;
+ struct symbol *tsym = to_al->sym;
+ struct call_path *cp;
+
+ if (sym && is_x86_retpoline(sym->name)) {
+ /*
+ * This is a x86 retpoline fn. It pollutes the call graph by
+ * showing up everywhere there is an indirect branch, but does
+ * not itself mean anything. Here the top-of-stack is removed,
+ * by decrementing the stack count, and then further down, the
+ * resulting top-of-stack is replaced with the actual target.
+ * The result is that the retpoline functions will no longer
+ * appear in the call graph. Note this only affects the call
+ * graph, since all the original branches are left unchanged.
+ */
+ ts->cnt -= 1;
+ sym = ts->stack[ts->cnt - 2].cp->sym;
+ if (sym && sym == tsym && to_al->addr != tsym->start) {
+ /*
+ * Target is back to the middle of the symbol we came
+ * from so assume it is an indirect jmp and forget it
+ * altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+ } else if (sym && sym == tsym) {
+ /*
+ * Target is back to the symbol we came from so assume it is an
+ * indirect jmp and forget it altogether.
+ */
+ ts->cnt -= 1;
+ return 0;
+ }
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
+ sample->addr, ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ /* Replace the top-of-stack with the actual target */
+ ts->stack[ts->cnt - 1].cp = cp;
+
+ return 0;
+}
+
int thread_stack__process(struct thread *thread, struct comm *comm,
struct perf_sample *sample,
struct addr_location *from_al,
struct addr_location *to_al, u64 ref,
struct call_return_processor *crp)
{
- struct thread_stack *ts = thread->ts;
+ struct thread_stack *ts = thread__stack(thread, sample->cpu);
+ enum retpoline_state_t rstate;
int err = 0;
- if (ts) {
- if (!ts->crp) {
- /* Supersede thread_stack__event() */
- thread_stack__free(thread);
- thread->ts = thread_stack__new(thread, crp);
- if (!thread->ts)
- return -ENOMEM;
- ts = thread->ts;
- ts->comm = comm;
- }
- } else {
- thread->ts = thread_stack__new(thread, crp);
- if (!thread->ts)
+ if (ts && !ts->crp) {
+ /* Supersede thread_stack__event() */
+ thread_stack__reset(thread, ts);
+ ts = NULL;
+ }
+
+ if (!ts) {
+ ts = thread_stack__new(thread, sample->cpu, crp);
+ if (!ts)
return -ENOMEM;
- ts = thread->ts;
ts->comm = comm;
}
+ rstate = ts->rstate;
+ if (rstate == X86_RETPOLINE_DETECTED)
+ ts->rstate = X86_RETPOLINE_POSSIBLE;
+
/* Flush stack on exec */
if (ts->comm != comm && thread->pid_ == thread->tid) {
err = __thread_stack__flush(thread, ts);
@@ -595,8 +856,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
/* If the stack is empty, put the current symbol on the stack */
if (!ts->cnt) {
- err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
- ref);
+ err = thread_stack__bottom(ts, sample, from_al, to_al, ref);
if (err)
return err;
}
@@ -620,14 +880,27 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
to_al->sym, sample->addr,
ts->kernel_start);
- if (!cp)
- return -ENOMEM;
err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
cp, false, trace_end);
+
+ /*
+ * A call to the same symbol but not the start of the symbol,
+ * may be the start of a x86 retpoline.
+ */
+ if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
+ from_al->sym == to_al->sym &&
+ to_al->addr != to_al->sym->start)
+ ts->rstate = X86_RETPOLINE_DETECTED;
+
} else if (sample->flags & PERF_IP_FLAG_RETURN) {
if (!sample->ip || !sample->addr)
return 0;
+ /* x86 retpoline 'return' doesn't match the stack */
+ if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
+ ts->stack[ts->cnt - 1].ret_addr != sample->addr)
+ return thread_stack__x86_retpoline(ts, sample, to_al);
+
err = thread_stack__pop_cp(thread, ts, sample->addr,
sample->time, ref, from_al->sym);
if (err) {
@@ -640,14 +913,35 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
err = thread_stack__trace_begin(thread, ts, sample->time, ref);
} else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
err = thread_stack__trace_end(ts, sample, ref);
+ } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
+ from_al->sym != to_al->sym && to_al->sym &&
+ to_al->addr == to_al->sym->start) {
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+
+ /*
+ * The compiler might optimize a call/ret combination by making
+ * it a jmp. Make that visible by recording on the stack a
+ * branch to the start of a different symbol. Note, that means
+ * when a ret pops the stack, all jmps must be popped off first.
+ */
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->sym, sample->addr,
+ ts->kernel_start);
+ err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
+ false);
+ if (!err)
+ ts->stack[ts->cnt - 1].non_call = true;
}
return err;
}
-size_t thread_stack__depth(struct thread *thread)
+size_t thread_stack__depth(struct thread *thread, int cpu)
{
- if (!thread->ts)
+ struct thread_stack *ts = thread__stack(thread, cpu);
+
+ if (!ts)
return 0;
- return thread->ts->cnt;
+ return ts->cnt;
}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b7e41c4ebfdd..b7c04e19ad41 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -35,10 +35,13 @@ struct call_path;
*
* CALL_RETURN_NO_CALL: 'return' but no matching 'call'
* CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
+ * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
+ * symbol
*/
enum {
CALL_RETURN_NO_CALL = 1 << 0,
CALL_RETURN_NO_RETURN = 1 << 1,
+ CALL_RETURN_NON_CALL = 1 << 2,
};
/**
@@ -80,14 +83,14 @@ struct call_return_processor {
void *data;
};
-int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
u64 to_ip, u16 insn_len, u64 trace_nr);
-void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
-void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
- size_t sz, u64 ip);
+void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
-size_t thread_stack__depth(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread, int cpu);
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, void *data),
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 2048d393ece6..4c179fef442d 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -12,6 +12,7 @@
#include "debug.h"
#include "namespaces.h"
#include "comm.h"
+#include "symbol.h"
#include "unwind.h"
#include <api/fs/fs.h>
@@ -64,6 +65,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
RB_CLEAR_NODE(&thread->rb_node);
/* Thread holds first ref to nsdata. */
thread->nsinfo = nsinfo__new(pid);
+ srccode_state_init(&thread->srccode_state);
}
return thread;
@@ -103,6 +105,7 @@ void thread__delete(struct thread *thread)
unwind__finish_access(thread);
nsinfo__zput(thread->nsinfo);
+ srccode_state_free(&thread->srccode_state);
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
@@ -330,7 +333,8 @@ static int thread__prepare_access(struct thread *thread)
}
static int thread__clone_map_groups(struct thread *thread,
- struct thread *parent)
+ struct thread *parent,
+ bool do_maps_clone)
{
/* This is new thread, we share map groups for process. */
if (thread->pid_ == parent->pid_)
@@ -341,15 +345,11 @@ static int thread__clone_map_groups(struct thread *thread,
thread->pid_, thread->tid, parent->pid_, parent->tid);
return 0;
}
-
/* But this one is new process, copy maps. */
- if (map_groups__clone(thread, parent->mg) < 0)
- return -ENOMEM;
-
- return 0;
+ return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0;
}
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone)
{
if (parent->comm_set) {
const char *comm = thread__comm_str(parent);
@@ -362,7 +362,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
}
thread->ppid = parent->tid;
- return thread__clone_map_groups(thread, parent);
+ return thread__clone_map_groups(thread, parent, do_maps_clone);
}
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 07606aa6998d..8276ffeec556 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -5,13 +5,18 @@
#include <linux/refcount.h>
#include <linux/rbtree.h>
#include <linux/list.h>
+#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
-#include "symbol.h"
+#include "srccode.h"
+#include "symbol_conf.h"
#include <strlist.h>
#include <intlist.h>
#include "rwsem.h"
+struct addr_location;
+struct map;
+struct namespaces_event;
struct thread_stack;
struct unwind_libunwind_ops;
@@ -38,10 +43,13 @@ struct thread {
void *priv;
struct thread_stack *ts;
struct nsinfo *nsinfo;
+ struct srccode_state srccode_state;
#ifdef HAVE_LIBUNWIND_SUPPORT
void *addr_space;
struct unwind_libunwind_ops *unwind_libunwind_ops;
#endif
+ bool filter;
+ int filter_entry_depth;
};
struct machine;
@@ -87,16 +95,20 @@ struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
const char *thread__comm_str(const struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
size_t thread__fprintf(struct thread *thread, FILE *fp);
struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
u64 addr, struct addr_location *al);
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
struct addr_location *al);
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 56e4ca54020a..250391672f9f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -53,7 +53,10 @@ struct perf_tool {
itrace_start,
context_switch,
throttle,
- unthrottle;
+ unthrottle,
+ ksymbol,
+ bpf_event;
+
event_attr_op attr;
event_attr_op event_update;
event_op2 tracing_data;
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 8e517def925b..4c8da8c4435f 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -46,8 +46,9 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
samples_per_sec;
ret = SNPRINTF(bf, size,
" PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
- " exact: %4.1f%% [", samples_per_sec,
- ksamples_percent, esamples_percent);
+ " exact: %4.1f%% lost: %" PRIu64 "/%" PRIu64 " drop: %" PRIu64 "/%" PRIu64 " [",
+ samples_per_sec, ksamples_percent, esamples_percent,
+ top->lost, top->lost_total, top->drop, top->drop_total);
} else {
float us_samples_per_sec = top->us_samples / top->delay_secs;
float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
@@ -106,6 +107,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
top->evlist->cpus->nr > 1 ? "s" : "");
}
+ perf_top__reset_sample_counters(top);
return ret;
}
@@ -113,5 +115,5 @@ void perf_top__reset_sample_counters(struct perf_top *top)
{
top->samples = top->us_samples = top->kernel_samples =
top->exact_samples = top->guest_kernel_samples =
- top->guest_us_samples = 0;
+ top->guest_us_samples = top->lost = top->drop = 0;
}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index 9add1f72ce95..19f95eaf75c8 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -22,7 +22,7 @@ struct perf_top {
* Symbols will be added here in perf_event__process_sample and will
* get out after decayed.
*/
- u64 samples;
+ u64 samples, lost, lost_total, drop, drop_total;
u64 kernel_samples, us_samples;
u64 exact_samples;
u64 guest_us_samples, guest_kernel_samples;
@@ -40,6 +40,14 @@ struct perf_top {
const char *sym_filter;
float min_percent;
unsigned int nr_threads_synthesize;
+
+ struct {
+ struct ordered_events *in;
+ struct ordered_events data[2];
+ bool rotate;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ } qe;
};
#define CONSOLE_CLEAR ""
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 32e558a65af3..ad74be1f0e42 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -33,7 +33,7 @@ static int get_common_field(struct scripting_context *context,
int *offset, int *size, const char *type)
{
struct tep_handle *pevent = context->pevent;
- struct tep_event_format *event;
+ struct tep_event *event;
struct tep_format_field *field;
if (!*size) {
@@ -95,7 +95,7 @@ int common_pc(struct scripting_context *context)
}
unsigned long long
-raw_field_value(struct tep_event_format *event, const char *name, void *data)
+raw_field_value(struct tep_event *event, const char *name, void *data)
{
struct tep_format_field *field;
unsigned long long val;
@@ -109,12 +109,12 @@ raw_field_value(struct tep_event_format *event, const char *name, void *data)
return val;
}
-unsigned long long read_size(struct tep_event_format *event, void *ptr, int size)
+unsigned long long read_size(struct tep_event *event, void *ptr, int size)
{
return tep_read_number(event->pevent, ptr, size);
}
-void event_format__fprintf(struct tep_event_format *event,
+void event_format__fprintf(struct tep_event *event,
int cpu, void *data, int size, FILE *fp)
{
struct tep_record record;
@@ -131,7 +131,7 @@ void event_format__fprintf(struct tep_event_format *event,
trace_seq_destroy(&s);
}
-void event_format__print(struct tep_event_format *event,
+void event_format__print(struct tep_event *event,
int cpu, void *data, int size)
{
return event_format__fprintf(event, cpu, data, size, stdout);
@@ -190,12 +190,12 @@ int parse_event_file(struct tep_handle *pevent,
return tep_parse_event(pevent, buf, size, sys);
}
-struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
- struct tep_event_format *event)
+struct tep_event *trace_find_next_event(struct tep_handle *pevent,
+ struct tep_event *event)
{
static int idx;
int events_count;
- struct tep_event_format *all_events;
+ struct tep_event *all_events;
all_events = tep_get_first_event(pevent);
events_count = tep_get_events_count(pevent);
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 76f12c705ef9..efe2f58cff4e 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -102,7 +102,7 @@ static unsigned int read4(struct tep_handle *pevent)
if (do_read(&data, 4) < 0)
return 0;
- return __tep_data2host4(pevent, data);
+ return tep_read_number(pevent, &data, 4);
}
static unsigned long long read8(struct tep_handle *pevent)
@@ -111,7 +111,7 @@ static unsigned long long read8(struct tep_handle *pevent)
if (do_read(&data, 8) < 0)
return 0;
- return __tep_data2host8(pevent, data);
+ return tep_read_number(pevent, &data, 8);
}
static char *read_string(void)
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index 95664b2f771e..cbe0dd758e3a 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -72,12 +72,12 @@ void trace_event__cleanup(struct trace_event *t)
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-static struct tep_event_format*
+static struct tep_event*
tp_format(const char *sys, const char *name)
{
char *tp_dir = get_events_file(sys);
struct tep_handle *pevent = tevent.pevent;
- struct tep_event_format *event = NULL;
+ struct tep_event *event = NULL;
char path[PATH_MAX];
size_t size;
char *data;
@@ -102,7 +102,7 @@ tp_format(const char *sys, const char *name)
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-struct tep_event_format*
+struct tep_event*
trace_event__tp_format(const char *sys, const char *name)
{
if (!tevent_initialized && trace_event__init2())
@@ -111,7 +111,7 @@ trace_event__tp_format(const char *sys, const char *name)
return tp_format(sys, name);
}
-struct tep_event_format *trace_event__tp_format_id(int id)
+struct tep_event *trace_event__tp_format_id(int id)
{
if (!tevent_initialized && trace_event__init2())
return ERR_PTR(-ENOMEM);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index f024d73bfc40..d9b0a942090a 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -22,17 +22,17 @@ int trace_event__init(struct trace_event *t);
void trace_event__cleanup(struct trace_event *t);
int trace_event__register_resolver(struct machine *machine,
tep_func_resolver_t *func);
-struct tep_event_format*
+struct tep_event*
trace_event__tp_format(const char *sys, const char *name);
-struct tep_event_format *trace_event__tp_format_id(int id);
+struct tep_event *trace_event__tp_format_id(int id);
int bigendian(void);
-void event_format__fprintf(struct tep_event_format *event,
+void event_format__fprintf(struct tep_event *event,
int cpu, void *data, int size, FILE *fp);
-void event_format__print(struct tep_event_format *event,
+void event_format__print(struct tep_event *event,
int cpu, void *data, int size);
int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
@@ -40,7 +40,7 @@ int parse_event_file(struct tep_handle *pevent,
char *buf, unsigned long size, char *sys);
unsigned long long
-raw_field_value(struct tep_event_format *event, const char *name, void *data);
+raw_field_value(struct tep_event *event, const char *name, void *data);
void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
@@ -48,9 +48,9 @@ void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int siz
ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
-struct tep_event_format *trace_find_next_event(struct tep_handle *pevent,
- struct tep_event_format *event);
-unsigned long long read_size(struct tep_event_format *event, void *ptr, int size);
+struct tep_event *trace_find_next_event(struct tep_handle *pevent,
+ struct tep_event *event);
+unsigned long long read_size(struct tep_event *event, void *ptr, int size);
unsigned long long eval_flag(const char *flag);
int read_tracing_data(int fd, struct list_head *pattrs);
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 6f318b15950e..407d0167b942 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -8,6 +8,8 @@
#include "unwind.h"
#include "unwind-libdw.h"
#include "machine.h"
+#include "map.h"
+#include "symbol.h"
#include "thread.h"
#include <linux/types.h>
#include "event.h"
@@ -45,13 +47,13 @@ static int __report_module(struct addr_location *al, u64 ip,
Dwarf_Addr s;
dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
- if (s != al->map->start)
+ if (s != al->map->start - al->map->pgoff)
mod = 0;
}
if (!mod)
mod = dwfl_report_elf(ui->dwfl, dso->short_name,
- (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start,
+ (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
false);
return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 79f521a552cf..f3c666a84e4d 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -34,6 +34,7 @@
#include "session.h"
#include "perf_regs.h"
#include "unwind.h"
+#include "map.h"
#include "symbol.h"
#include "util.h"
#include "debug.h"
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index b029a5e9ae49..9778b3133b77 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include "unwind.h"
+#include "map.h"
#include "thread.h"
#include "session.h"
#include "debug.h"
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 093352e93d50..d388f80d8703 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -2,6 +2,7 @@
#include "../perf.h"
#include "util.h"
#include "debug.h"
+#include "namespaces.h"
#include <api/fs/fs.h>
#include <sys/mman.h>
#include <sys/stat.h>
@@ -20,6 +21,7 @@
#include <linux/time64.h>
#include <unistd.h>
#include "strlist.h"
+#include "string2.h"
/*
* XXX We need to find a better place for these things...
@@ -116,23 +118,67 @@ int mkdir_p(char *path, mode_t mode)
return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
}
-int rm_rf(const char *path)
+static bool match_pat(char *file, const char **pat)
+{
+ int i = 0;
+
+ if (!pat)
+ return true;
+
+ while (pat[i]) {
+ if (strglobmatch(file, pat[i]))
+ return true;
+
+ i++;
+ }
+
+ return false;
+}
+
+/*
+ * The depth specify how deep the removal will go.
+ * 0 - will remove only files under the 'path' directory
+ * 1 .. x - will dive in x-level deep under the 'path' directory
+ *
+ * If specified the pat is array of string patterns ended with NULL,
+ * which are checked upon every file/directory found. Only matching
+ * ones are removed.
+ *
+ * The function returns:
+ * 0 on success
+ * -1 on removal failure with errno set
+ * -2 on pattern failure
+ */
+static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
{
DIR *dir;
- int ret = 0;
+ int ret;
struct dirent *d;
char namebuf[PATH_MAX];
+ struct stat statbuf;
+ /* Do not fail if there's no file. */
+ ret = lstat(path, &statbuf);
+ if (ret)
+ return 0;
+
+ /* Try to remove any file we get. */
+ if (!(statbuf.st_mode & S_IFDIR))
+ return unlink(path);
+
+ /* We have directory in path. */
dir = opendir(path);
if (dir == NULL)
- return 0;
+ return -1;
while ((d = readdir(dir)) != NULL && !ret) {
- struct stat statbuf;
if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
continue;
+ if (!match_pat(d->d_name, pat))
+ return -2;
+
scnprintf(namebuf, sizeof(namebuf), "%s/%s",
path, d->d_name);
@@ -144,7 +190,7 @@ int rm_rf(const char *path)
}
if (S_ISDIR(statbuf.st_mode))
- ret = rm_rf(namebuf);
+ ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0;
else
ret = unlink(namebuf);
}
@@ -156,6 +202,22 @@ int rm_rf(const char *path)
return rmdir(path);
}
+int rm_rf_perf_data(const char *path)
+{
+ const char *pat[] = {
+ "header",
+ "data.*",
+ NULL,
+ };
+
+ return rm_rf_depth_pat(path, 0, pat);
+}
+
+int rm_rf(const char *path)
+{
+ return rm_rf_depth_pat(path, INT_MAX, NULL);
+}
+
/* A filter which removes dot files */
bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
{
@@ -506,3 +568,13 @@ out:
return tip;
}
+
+char *perf_exe(char *buf, int len)
+{
+ int n = readlink("/proc/self/exe", buf, len);
+ if (n > 0) {
+ buf[n] = 0;
+ return buf;
+ }
+ return strcpy(buf, "perf");
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 14508ee7707a..09c1b0f91f65 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -31,6 +31,7 @@ struct strlist;
int mkdir_p(char *path, mode_t mode);
int rm_rf(const char *path);
+int rm_rf_perf_data(const char *path);
struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
bool lsdir_no_dot_filter(const char *name, struct dirent *d);
int copyfile(const char *from, const char *to);
@@ -59,6 +60,10 @@ int fetch_kernel_version(unsigned int *puint,
const char *perf_tip(const char *dirpath);
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif
+
#ifndef HAVE_SCHED_GETCPU_SUPPORT
int sched_getcpu(void);
#endif
@@ -72,6 +77,8 @@ extern bool perf_singlethreaded;
void perf_set_singlethreaded(void);
void perf_set_multithreaded(void);
+char *perf_exe(char *buf, int len);
+
#ifndef O_CLOEXEC
#ifdef __sparc__
#define O_CLOEXEC 0x400000
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 741af209b19d..5031b7b22bbd 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -11,6 +11,7 @@
#include "vdso.h"
#include "util.h"
+#include "map.h"
#include "symbol.h"
#include "machine.h"
#include "thread.h"
@@ -18,10 +19,10 @@
#include "debug.h"
/*
- * Include definition of find_vdso_map() also used in perf-read-vdso.c for
+ * Include definition of find_map() also used in perf-read-vdso.c for
* building perf-read-vdso32 and perf-read-vdsox32.
*/
-#include "find-vdso-map.c"
+#include "find-map.c"
#define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
@@ -76,7 +77,7 @@ static char *get_file(struct vdso_file *vdso_file)
if (vdso_file->found)
return vdso_file->temp_file_name;
- if (vdso_file->error || find_vdso_map(&start, &end))
+ if (vdso_file->error || find_map(&start, &end, VDSO__MAP_NAME))
return NULL;
size = end - start;
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index 902ce6384f57..512ad7c09b13 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -6,7 +6,6 @@
#include <sys/mman.h>
#include <zlib.h>
#include <linux/compiler.h>
-#include <unistd.h>
#include "util/compress.h"
#include "util/util.h"
OpenPOWER on IntegriCloud