diff options
Diffstat (limited to 'tools/perf')
252 files changed, 18132 insertions, 4836 deletions
diff --git a/tools/perf/Build b/tools/perf/Build index 72237455b400..a43fae7f439a 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -1,5 +1,6 @@ perf-y += builtin-bench.o perf-y += builtin-annotate.o +perf-y += builtin-config.o perf-y += builtin-diff.o perf-y += builtin-evlist.o perf-y += builtin-help.o @@ -19,6 +20,7 @@ perf-y += builtin-kvm.o perf-y += builtin-inject.o perf-y += builtin-mem.o perf-y += builtin-data.o +perf-y += builtin-version.o perf-$(CONFIG_AUDIT) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o @@ -34,8 +36,13 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" CFLAGS_builtin-help.o += $(paths) CFLAGS_builtin-timechart.o += $(paths) -CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \ + -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \ + -DPREFIX="BUILD_STR($(prefix_SQ))" \ + -include $(OUTPUT)PERF-VERSION-FILE CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))" +CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" +CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" libperf-y += util/ libperf-y += arch/ diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt new file mode 100644 index 000000000000..15949e2a7805 --- /dev/null +++ b/tools/perf/Documentation/perf-config.txt @@ -0,0 +1,456 @@ +perf-config(1) +============== + +NAME +---- +perf-config - Get and set variables in a configuration file. + +SYNOPSIS +-------- +[verse] +'perf config' [<file-option>] -l | --list + +DESCRIPTION +----------- +You can manage variables in a configuration file with this command. + +OPTIONS +------- + +-l:: +--list:: + Show current config variables, name and value, for all sections. + +--user:: + For writing and reading options: write to user + '$HOME/.perfconfig' file or read it. + +--system:: + For writing and reading options: write to system-wide + '$(sysconfdir)/perfconfig' or read it. + +CONFIGURATION FILE +------------------ + +The perf configuration file contains many variables to change various +aspects of each of its tools, including output, disk usage, etc. +The '$HOME/.perfconfig' file is used to store a per-user configuration. +The file '$(sysconfdir)/perfconfig' can be used to +store a system-wide default configuration. + +When reading or writing, the values are read from the system and user +configuration files by default, and options '--system' and '--user' +can be used to tell the command to read from or write to only that location. + +Syntax +~~~~~~ + +The file consist of sections. A section starts with its name +surrounded by square brackets and continues till the next section +begins. Each variable must be in a section, and have the form +'name = value', for example: + + [section] + name1 = value1 + name2 = value2 + +Section names are case sensitive and can contain any characters except +newline (double quote `"` and backslash have to be escaped as `\"` and `\\`, +respectively). Section headers can't span multiple lines. + +Example +~~~~~~~ + +Given a $HOME/.perfconfig like this: + +# +# This is the config file, and +# a '#' and ';' character indicates a comment +# + + [colors] + # Color variables + top = red, default + medium = green, default + normal = lightgray, default + selected = white, lightgray + jump_arrows = blue, default + addr = magenta, default + root = white, blue + + [tui] + # Defaults if linked with libslang + report = on + annotate = on + top = on + + [buildid] + # Default, disable using /dev/null + dir = ~/.debug + + [annotate] + # Defaults + hide_src_code = false + use_offset = true + jump_arrows = true + show_nr_jumps = false + + [help] + # Format can be man, info, web or html + format = man + autocorrect = 0 + + [ui] + show-headers = true + + [call-graph] + # fp (framepointer), dwarf + record-mode = fp + print-type = graph + order = caller + sort-key = function + +Variables +~~~~~~~~~ + +colors.*:: + The variables for customizing the colors used in the output for the + 'report', 'top' and 'annotate' in the TUI. They should specify the + foreground and background colors, separated by a comma, for example: + + medium = green, lightgray + + If you want to use the color configured for you terminal, just leave it + as 'default', for example: + + medium = default, lightgray + + Available colors: + red, yellow, green, cyan, gray, black, blue, + white, default, magenta, lightgray + + colors.top:: + 'top' means a overhead percentage which is more than 5%. + And values of this variable specify percentage colors. + Basic key values are foreground-color 'red' and + background-color 'default'. + colors.medium:: + 'medium' means a overhead percentage which has more than 0.5%. + Default values are 'green' and 'default'. + colors.normal:: + 'normal' means the rest of overhead percentages + except 'top', 'medium', 'selected'. + Default values are 'lightgray' and 'default'. + colors.selected:: + This selects the colors for the current entry in a list of entries + from sub-commands (top, report, annotate). + Default values are 'black' and 'lightgray'. + colors.jump_arrows:: + Colors for jump arrows on assembly code listings + such as 'jns', 'jmp', 'jane', etc. + Default values are 'blue', 'default'. + colors.addr:: + This selects colors for addresses from 'annotate'. + Default values are 'magenta', 'default'. + colors.root:: + Colors for headers in the output of a sub-commands (top, report). + Default values are 'white', 'blue'. + +tui.*, gtk.*:: + Subcommands that can be configured here are 'top', 'report' and 'annotate'. + These values are booleans, for example: + + [tui] + top = true + + will make the TUI be the default for the 'top' subcommand. Those will be + available if the required libs were detected at tool build time. + +buildid.*:: + buildid.dir:: + Each executable and shared library in modern distributions comes with a + content based identifier that, if available, will be inserted in a + 'perf.data' file header to, at analysis time find what is needed to do + symbol resolution, code annotation, etc. + + The recording tools also stores a hard link or copy in a per-user + directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms + and /proc/kcore files to be used at analysis time. + + The buildid.dir variable can be used to either change this directory + cache location, or to disable it altogether. If you want to disable it, + set buildid.dir to /dev/null. The default is $HOME/.debug + +annotate.*:: + These options work only for TUI. + These are in control of addresses, jump function, source code + in lines of assembly code from a specific program. + + annotate.hide_src_code:: + If a program which is analyzed has source code, + this option lets 'annotate' print a list of assembly code with the source code. + For example, let's see a part of a program. There're four lines. + If this option is 'true', they can be printed + without source code from a program as below. + + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ mov (%rdi),%rdx + + But if this option is 'false', source code of the part + can be also printed as below. Default is 'false'. + + │ struct rb_node *rb_next(const struct rb_node *node) + │ { + │ push %rbp + │ mov %rsp,%rbp + │ sub $0x10,%rsp + │ struct rb_node *parent; + │ + │ if (RB_EMPTY_NODE(node)) + │ mov (%rdi),%rdx + │ return n; + + annotate.use_offset:: + Basing on a first address of a loaded function, offset can be used. + Instead of using original addresses of assembly code, + addresses subtracted from a base address can be printed. + Let's illustrate an example. + If a base address is 0XFFFFFFFF81624d50 as below, + + ffffffff81624d50 <load0> + + an address on assembly code has a specific absolute address as below + + ffffffff816250b8:│ mov 0x8(%r14),%rdi + + but if use_offset is 'true', an address subtracted from a base address is printed. + Default is true. This option is only applied to TUI. + + 368:│ mov 0x8(%r14),%rdi + + annotate.jump_arrows:: + There can be jump instruction among assembly code. + Depending on a boolean value of jump_arrows, + arrows can be printed or not which represent + where do the instruction jump into as below. + + │ ┌──jmp 1333 + │ │ xchg %ax,%ax + │1330:│ mov %r15,%r10 + │1333:└─→cmp %r15,%r14 + + If jump_arrow is 'false', the arrows isn't printed as below. + Default is 'false'. + + │ ↓ jmp 1333 + │ xchg %ax,%ax + │1330: mov %r15,%r10 + │1333: cmp %r15,%r14 + + annotate.show_linenr:: + When showing source code if this option is 'true', + line numbers are printed as below. + + │1628 if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │1628 data->id = *array; + │1629 array++; + │1630 } + + However if this option is 'false', they aren't printed as below. + Default is 'false'. + + │ if (type & PERF_SAMPLE_IDENTIFIER) { + │ ↓ jne 508 + │ data->id = *array; + │ array++; + │ } + + annotate.show_nr_jumps:: + Let's see a part of assembly code. + + │1382: movb $0x1,-0x270(%rbp) + + If use this, the number of branches jumping to that address can be printed as below. + Default is 'false'. + + │1 1382: movb $0x1,-0x270(%rbp) + + annotate.show_total_period:: + To compare two records on an instruction base, with this option + provided, display total number of samples that belong to a line + in assembly code. If this option is 'true', total periods are printed + instead of percent values as below. + + 302 │ mov %eax,%eax + + But if this option is 'false', percent values for overhead are printed i.e. + Default is 'false'. + + 99.93 │ mov %eax,%eax + +hist.*:: + hist.percentage:: + This option control the way to calculate overhead of filtered entries - + that means the value of this option is effective only if there's a + filter (by comm, dso or symbol name). Suppose a following example: + + Overhead Symbols + ........ ....... + 33.33% foo + 33.33% bar + 33.33% baz + + This is an original overhead and we'll filter out the first 'foo' + entry. The value of 'relative' would increase the overhead of 'bar' + and 'baz' to 50.00% for each, while 'absolute' would show their + current overhead (33.33%). + +ui.*:: + ui.show-headers:: + This option controls display of column headers (like 'Overhead' and 'Symbol') + in 'report' and 'top'. If this option is false, they are hidden. + This option is only applied to TUI. + +call-graph.*:: + When sub-commands 'top' and 'report' work with -g/—-children + there're options in control of call-graph. + + call-graph.record-mode:: + The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'. + The value of 'dwarf' is effective only if perf detect needed library + (libunwind or a recent version of libdw). + 'lbr' only work for cpus that support it. + + call-graph.dump-size:: + The size of stack to dump in order to do post-unwinding. Default is 8192 (byte). + When using dwarf into record-mode, the default size will be used if omitted. + + call-graph.print-type:: + The print-types can be graph (graph absolute), fractal (graph relative), + flat and folded. This option controls a way to show overhead for each callchain + entry. Suppose a following example. + + Overhead Symbols + ........ ....... + 40.00% foo + | + ---foo + | + |--50.00%--bar + | main + | + --50.00%--baz + main + + This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly + half and half so 'fractal' shows 50.00% for each + (meaning that it assumes 100% total overhead of 'foo'). + + The 'graph' uses absolute overhead value of 'foo' as total so each of + 'bar' and 'baz' callchain will have 20.00% of overhead. + If 'flat' is used, single column and linear exposure of call chains. + 'folded' mean call chains are displayed in a line, separated by semicolons. + + call-graph.order:: + This option controls print order of callchains. The default is + 'callee' which means callee is printed at top and then followed by its + caller and so on. The 'caller' prints it in reverse order. + + If this option is not set and report.children or top.children is + set to true (or the equivalent command line option is given), + the default value of this option is changed to 'caller' for the + execution of 'perf report' or 'perf top'. Other commands will + still default to 'callee'. + + call-graph.sort-key:: + The callchains are merged if they contain same information. + The sort-key option determines a way to compare the callchains. + A value of 'sort-key' can be 'function' or 'address'. + The default is 'function'. + + call-graph.threshold:: + When there're many callchains it'd print tons of lines. So perf omits + small callchains under a certain overhead (threshold) and this option + control the threshold. Default is 0.5 (%). The overhead is calculated + by value depends on call-graph.print-type. + + call-graph.print-limit:: + This is a maximum number of lines of callchain printed for a single + histogram entry. Default is 0 which means no limitation. + +report.*:: + report.percent-limit:: + This one is mostly the same as call-graph.threshold but works for + histogram entries. Entries having an overhead lower than this + percentage will not be printed. Default is '0'. If percent-limit + is '10', only entries which have more than 10% of overhead will be + printed. + + report.queue-size:: + This option sets up the maximum allocation size of the internal + event queue for ordering events. Default is 0, meaning no limit. + + report.children:: + 'Children' means functions called from another function. + If this option is true, 'perf report' cumulates callchains of children + and show (accumulated) total overhead as well as 'Self' overhead. + Please refer to the 'perf report' manual. The default is 'true'. + + report.group:: + This option is to show event group information together. + Example output with this turned on, notice that there is one column + per event in the group, ref-cycles and cycles: + + # group: {ref-cycles,cycles} + # ======== + # + # Samples: 7K of event 'anon group { ref-cycles, cycles }' + # Event count (approx.): 6876107743 + # + # Overhead Command Shared Object Symbol + # ................ ....... ................. ................... + # + 99.84% 99.76% noploop noploop [.] main + 0.07% 0.00% noploop ld-2.15.so [.] strcmp + 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del + +top.*:: + top.children:: + Same as 'report.children'. So if it is enabled, the output of 'top' + command will have 'Children' overhead column as well as 'Self' overhead + column by default. + The default is 'true'. + +man.*:: + man.viewer:: + This option can assign a tool to view manual pages when 'help' + subcommand was invoked. Supported tools are 'man', 'woman' + (with emacs client) and 'konqueror'. Default is 'man'. + + New man viewer tool can be also added using 'man.<tool>.cmd' + or use different path using 'man.<tool>.path' config option. + +pager.*:: + pager.<subcommand>:: + When the subcommand is run on stdio, determine whether it uses + pager or not based on this value. Default is 'unspecified'. + +kmem.*:: + kmem.default:: + This option decides which allocator is to be analyzed if neither + '--slab' nor '--page' option is used. Default is 'slab'. + +record.*:: + record.build-id:: + This option can be 'cache', 'no-cache' or 'skip'. + 'cache' is to post-process data and save/update the binaries into + the build-id cache (in ~/.debug). This is the default. + But if this option is 'no-cache', it will not update the build-id cache. + 'skip' skips post-processing and does not update the cache. + +SEE ALSO +-------- +linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 1ceb3700ffbb..6f7200fb85cf 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -32,6 +32,9 @@ OPTIONS --group:: Show event group information. +--trace-fields:: + Show tracepoint field names. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-list[1], diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 0b1cedeef895..87b2588d1cbd 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -53,6 +53,13 @@ include::itrace.txt[] --strip:: Use with --itrace to strip out non-synthesized events. +-j:: +--jit:: + Process jitdump files by injecting the mmap records corresponding to jitted + functions. This option also generates the ELF images for each jitted function + found in the jitdumps files captured in the input perf.data file. Use this option + if you are monitoring environment using JIT runtimes, such as Java, DART or V8. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e630a7d2c348..19aa17532a16 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -207,11 +207,23 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0- In per-thread mode with inheritance mode on (default), samples are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. +-B:: +--no-buildid:: +Do not save the build ids of binaries in the perf.data files. This skips +post processing after recording, which sometimes makes the final step in +the recording process to take a long time, as it needs to process all +events looking for mmap records. The downside is that it can misresolve +symbols if the workload binaries used when recording get locally rebuilt +or upgraded, because the only key available in this case is the +pathname. You can also set the "record.build-id" config variable to +'skip to have this behaviour permanently. + -N:: --no-buildid-cache:: Do not update the buildid cache. This saves some overhead in situations where the information in the perf.data file (which includes buildids) -is sufficient. +is sufficient. You can also set the "record.build-id" config variable to +'no-cache' to have the same effect. -G name,...:: --cgroup name,...:: @@ -314,11 +326,26 @@ This option sets the time out limit. The default value is 500 ms. Record context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. ---clang-path:: +--clang-path=PATH:: Path to clang binary to use for compiling BPF scriptlets. +(enabled when BPF support is on) ---clang-opt:: +--clang-opt=OPTIONS:: Options passed to clang when compiling BPF scriptlets. +(enabled when BPF support is on) + +--vmlinux=PATH:: +Specify vmlinux path which has debuginfo. +(enabled when BPF prologue is on) + +--buildid-all:: +Record build-id of all DSOs regardless whether it's actually hit or not. + +--all-kernel:: +Configure all used events to run in kernel space. + +--all-user:: +Configure all used events to run in user space. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 5ce8da1e1256..12113992ac9d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -117,6 +117,46 @@ OPTIONS And default sort keys are changed to comm, dso_from, symbol_from, dso_to and symbol_to, see '--branch-stack'. + If the --mem-mode option is used, the following sort keys are also available + (incompatible with --branch-stack): + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. + + - symbol_daddr: name of data symbol being executed on at the time of sample + - dso_daddr: name of library or module containing the data being executed + on at the time of the sample + - locked: whether the bus was locked at the time of the sample + - tlb: type of tlb access for the data at the time of the sample + - mem: type of memory access for the data at the time of the sample + - snoop: type of snoop (if any) for the data at the time of the sample + - dcacheline: the cacheline the data address is on at the time of the sample + + And the default sort keys are changed to local_weight, mem, sym, dso, + symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. + + If the data file has tracepoint event(s), following (dynamic) sort keys + are also available: + trace, trace_fields, [<event>.]<field>[/raw] + + - trace: pretty printed trace output in a single column + - trace_fields: fields in tracepoints in separate columns + - <field name>: optional event and field name for a specific field + + The last form consists of event and field names. If event name is + omitted, it searches all events for matching field name. The matched + field will be shown only for the event has the field. The event name + supports substring match so user doesn't need to specify full subsystem + and event name everytime. For example, 'sched:sched_switch' event can + be shortened to 'switch' as long as it's not ambiguous. Also event can + be specified by its index (starting from 1) preceded by the '%'. + So '%1' is the first event, '%2' is the second, and so on. + + The field name can have '/raw' suffix which disables pretty printing + and shows raw field value like hex numbers. The --raw-trace option + has the same effect for all dynamic sort keys. + + The default sort keys are changed to 'trace' if all events in the data + file are tracepoint. + -F:: --fields=:: Specify output field - multiple keys can be specified in CSV format. @@ -127,22 +167,6 @@ OPTIONS By default, every sort keys not specified in -F will be appended automatically. - If --mem-mode option is used, following sort keys are also available - (incompatible with --branch-stack): - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. - - - symbol_daddr: name of data symbol being executed on at the time of sample - - dso_daddr: name of library or module containing the data being executed - on at the time of sample - - locked: whether the bus was locked at the time of sample - - tlb: type of tlb access for the data at the time of sample - - mem: type of memory access for the data at the time of sample - - snoop: type of snoop (if any) for the data at the time of sample - - dcacheline: the cacheline the data address is on at the time of sample - - And default sort keys are changed to local_weight, mem, sym, dso, - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. - -p:: --parent=<regex>:: A regex filter to identify parent. The parent is a caller of this @@ -170,17 +194,18 @@ OPTIONS Dump raw trace in ASCII. -g:: ---call-graph=<print_type,threshold[,print_limit],order,sort_key,branch>:: +--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>:: Display call chains using type, min percent threshold, print limit, - call order, sort key and branch. Note that ordering of parameters is not - fixed so any parement can be given in an arbitraty order. One exception - is the print_limit which should be preceded by threshold. + call order, sort key, optional branch and value. Note that ordering of + parameters is not fixed so any parement can be given in an arbitraty order. + One exception is the print_limit which should be preceded by threshold. print_type can be either: - flat: single column, linear exposure of call chains. - graph: use a graph tree, displaying absolute overhead rates. (default) - fractal: like graph, but displays relative rates. Each branch of the tree is considered as a new profiled object. + - folded: call chains are displayed in a line, separated by semicolons - none: disable call chain display. threshold is a percentage value which specifies a minimum percent to be @@ -204,6 +229,11 @@ OPTIONS - branch: include last branch information in callgraph when available. Usually more convenient to use --branch-history for this. + value can be: + - percent: diplay overhead percent (default) + - period: display event period + - count: display event count + --children:: Accumulate callchain of children to parent entry so that then can show up in the output. The output will have a new "Children" column @@ -321,7 +351,10 @@ OPTIONS --percent-limit:: Do not show entries which have an overhead under that percent. - (Default: 0). + (Default: 0). Note that this option also sets the percent limit (threshold) + of callchains. However the default value of callchain threshold is + different than the default value of hist entries. Please see the + --call-graph option for details. --percentage:: Determine how to display the overhead percentage of filtered entries. @@ -365,6 +398,12 @@ include::itrace.txt[] --socket-filter:: Only report the samples on the processor socket that match with this filter +--raw-trace:: + When displaying traceevent output, do not use print fmt or plugins. + +--hierarchy:: + Enable hierarchical output. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4e074a660826..04f23b404bbc 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -10,6 +10,8 @@ SYNOPSIS [verse] 'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command> 'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>] +'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>] +'perf stat' report [-i file] DESCRIPTION ----------- @@ -22,6 +24,11 @@ OPTIONS <command>...:: Any command you can specify in a shell. +record:: + See STAT RECORD. + +report:: + See STAT REPORT. -e:: --event=:: @@ -62,6 +69,14 @@ OPTIONS --scale:: scale/normalize counter values +-d:: +--detailed:: + print more detailed statistics, can be specified up to 3 times + + -d: detailed events, L1 and LLC data cache + -d -d: more detailed events, dTLB and iTLB events + -d -d -d: very detailed events, adding prefetch events + -r:: --repeat=<n>:: repeat command and print average + stddev (max: 100). 0 means forever. @@ -132,6 +147,10 @@ Print count deltas every N milliseconds (minimum: 10ms) The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. example: 'perf stat -I 1000 -e cycles -a sleep 5' +--metric-only:: +Only print computed metrics. Print them in a single line. +Don't show any raw values. Not supported with --per-thread. + --per-socket:: Aggregate counts per processor socket for system-wide mode measurements. This is a useful mode to detect imbalance between sockets. To enable this mode, @@ -159,6 +178,33 @@ filter out the startup phase of the program, which is often very different. Print statistics of transactional execution if supported. +STAT RECORD +----------- +Stores stat data into perf data file. + +-o file:: +--output file:: +Output file name. + +STAT REPORT +----------- +Reads and reports stat data from perf data file. + +-i file:: +--input file:: +Input file name. + +--per-socket:: +Aggregate counts per processor socket for system-wide mode measurements. + +--per-core:: +Aggregate counts per physical processor for system-wide mode measurements. + +-A:: +--no-aggr:: +Do not aggregate counts across all monitored CPUs. + + EXAMPLES -------- @@ -177,6 +223,29 @@ $ perf stat -- make -j Wall-clock time elapsed: 719.554352 msecs +CSV FORMAT +---------- + +With -x, perf stat is able to output a not-quite-CSV format output +Commas in the output are not put into "". To make it easy to parse +it is recommended to use a different character like -x \; + +The fields are in this order: + + - optional usec time stamp in fractions of second (with -I xxx) + - optional CPU, core, or socket identifier + - optional number of logical CPUs aggregated + - counter value + - unit of the counter value or empty + - event name + - run time of counter + - percentage of measurement time the counter was running + - optional variance if multiple values are collected with -r + - optional metric value + - optional unit of metric + +Additional metrics may be printed with all earlier fields being empty. + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 556cec09bf50..19f046f027cd 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -230,6 +230,12 @@ Default is to monitor all CPUS. The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k Note that this feature may not be available on all processors. +--raw-trace:: + When displaying traceevent output, do not use print fmt or plugins. + +--hierarchy:: + Enable hierarchy output. + INTERACTIVE PROMPTING KEYS -------------------------- diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7ea078658a87..13293de8869f 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -62,7 +62,6 @@ OPTIONS --verbose=:: Verbosity level. --i:: --no-inherit:: Child tasks do not inherit counters. diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example index 767ea2436e1c..1d8d5bc4cd2d 100644 --- a/tools/perf/Documentation/perfconfig.example +++ b/tools/perf/Documentation/perfconfig.example @@ -5,7 +5,7 @@ medium = green, lightgray normal = black, lightgray selected = lightgray, magenta - code = blue, lightgray + jump_arrows = blue, lightgray addr = magenta, lightgray [tui] diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt new file mode 100644 index 000000000000..5950b5a24efd --- /dev/null +++ b/tools/perf/Documentation/tips.txt @@ -0,0 +1,30 @@ +For a higher level overview, try: perf report --sort comm,dso +Sample related events with: perf record -e '{cycles,instructions}:S' +Compare performance results with: perf diff [<old file> <new file>] +Boolean options have negative forms, e.g.: perf report --no-children +Customize output of perf script with: perf script -F event,ip,sym +Generate a script for your data: perf script -g <lang> +Save output of perf stat using: perf stat record <target workload> +Create an archive with symtabs to analyse on other machine: perf archive +Search options using a keyword: perf report -h <keyword> +Use parent filter to see specific call path: perf report -p <regex> +List events using substring match: perf list <keyword> +To see list of saved events and attributes: perf evlist -v +Use --symfs <dir> if your symbol files are in non-standard locations +To see callchains in a more compact form: perf report -g folded +Show individual samples with: perf script +Limit to show entries above 5% only: perf report --percent-limit 5 +Profiling branch (mis)predictions with: perf record -b / perf report +Treat branches as callchains: perf report --branch-history +To count events in every 1000 msec: perf stat -I 1000 +Print event counts in CSV format with: perf stat -x, +If you have debuginfo enabled, try: perf report -s sym,srcline +For memory address profiling, try: perf mem record / perf mem report +For tracepoint events, try: perf report -s trace_fields +To record callchains for each sample: perf record -g +To record every process run by an user: perf record -u <user> +Skip collecing build-id when recording: perf record -B +To change sampling frequency to 100 Hz: perf record -F 100 +See assembly instructions with percentage: perf annotate <symbol> +If you prefer Intel style assembly, try: perf annotate -M intel +For hierarchical output, try: perf report --hierarchy diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 39c38cb45b00..2e1fa2357528 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,6 +1,7 @@ tools/perf tools/arch/alpha/include/asm/barrier.h tools/arch/arm/include/asm/barrier.h +tools/arch/arm64/include/asm/barrier.h tools/arch/ia64/include/asm/barrier.h tools/arch/mips/include/asm/barrier.h tools/arch/powerpc/include/asm/barrier.h @@ -20,14 +21,18 @@ tools/lib/traceevent tools/lib/bpf tools/lib/api tools/lib/bpf +tools/lib/subcmd tools/lib/hweight.c tools/lib/rbtree.c +tools/lib/string.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h -tools/lib/util/find_next_bit.c +tools/lib/find_bit.c +tools/lib/bitmap.c tools/include/asm/atomic.h tools/include/asm/barrier.h tools/include/asm/bug.h +tools/include/asm-generic/atomic-gcc.h tools/include/asm-generic/barrier.h tools/include/asm-generic/bitops/arch_hweight.h tools/include/asm-generic/bitops/atomic.h @@ -50,8 +55,10 @@ tools/include/linux/log2.h tools/include/linux/poison.h tools/include/linux/rbtree.h tools/include/linux/rbtree_augmented.h +tools/include/linux/string.h tools/include/linux/types.h tools/include/linux/err.h +tools/include/linux/bitmap.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 480546d5f13b..32a64e619028 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -68,6 +68,20 @@ all tags TAGS: $(print_msg) $(make) +ifdef MAKECMDGOALS +has_clean := 0 +ifneq ($(filter clean,$(MAKECMDGOALS)),) + has_clean := 1 +endif # clean + +ifeq ($(has_clean),1) + rest := $(filter-out clean,$(MAKECMDGOALS)) + ifneq ($(rest),) +$(rest): clean + endif # rest +endif # has_clean +endif # MAKECMDGOALS + # # The clean target is not really parallel, don't print the jobs info: # @@ -75,10 +89,17 @@ clean: $(make) # -# The build-test target is not really parallel, don't print the jobs info: +# The build-test target is not really parallel, don't print the jobs info, +# it also uses only the tests/make targets that don't pollute the source +# repository, i.e. that uses O= or builds the tarpkg outside the source +# repo directories. +# +# For a full test, use: +# +# make -C tools/perf -f tests/make # build-test: - @$(MAKE) -f tests/make --no-print-directory + @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out # # All other targets get passed through: diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0d19d5447d6c..4a4fad4182f5 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -58,6 +58,9 @@ include config/utilities.mak # # Define NO_LIBBIONIC if you do not want bionic support # +# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support +# used for generating build-ids for ELFs generated by jitdump. +# # Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support # for dwarf backtrace post unwind. # @@ -77,6 +80,9 @@ include config/utilities.mak # Define NO_AUXTRACE if you do not want AUX area tracing support # # Define NO_LIBBPF if you do not want BPF support +# +# Define FEATURES_DUMP to provide features detection dump file +# and bypass the feature detection # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL @@ -133,6 +139,8 @@ $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,LD,$(CROSS_COMPILE)ld) +LD += $(EXTRA_LDFLAGS) + PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f @@ -145,9 +153,10 @@ BISON = bison STRIP = strip AWK = awk -LIB_DIR = $(srctree)/tools/lib/api/ +LIB_DIR = $(srctree)/tools/lib/api/ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ -BPF_DIR = $(srctree)/tools/lib/bpf/ +BPF_DIR = $(srctree)/tools/lib/bpf/ +SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ # include config/Makefile by default and rule out # non-config cases @@ -161,10 +170,28 @@ ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),) endif endif +# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed. +# Without this setting the output feature dump file misses some features, for +# example, liberty. Select all checkers so we won't get an incomplete feature +# dump file. ifeq ($(config),1) +ifdef MAKECMDGOALS +ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump) +FEATURE_TESTS := all +endif +endif include config/Makefile endif +# The FEATURE_DUMP_EXPORT holds location of the actual +# FEATURE_DUMP file to be used to bypass feature detection +# (for bpf or any other subproject) +ifeq ($(FEATURES_DUMP),) +FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP) +else +FEATURE_DUMP_EXPORT := $(FEATURES_DUMP) +endif + export prefix bindir sharedir sysconfdir DESTDIR # sparse is architecture-neutral, which means that we need to tell it @@ -184,15 +211,17 @@ strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) TE_PATH=$(OUTPUT) BPF_PATH=$(OUTPUT) + SUBCMD_PATH=$(OUTPUT) ifneq ($(subdir),) - LIB_PATH=$(OUTPUT)/../lib/api/ + API_PATH=$(OUTPUT)/../lib/api/ else - LIB_PATH=$(OUTPUT) + API_PATH=$(OUTPUT) endif else TE_PATH=$(TRACE_EVENT_DIR) - LIB_PATH=$(LIB_DIR) + API_PATH=$(LIB_DIR) BPF_PATH=$(BPF_DIR) + SUBCMD_PATH=$(SUBCMD_DIR) endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a @@ -201,11 +230,13 @@ export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) -LIBAPI = $(LIB_PATH)libapi.a +LIBAPI = $(API_PATH)libapi.a export LIBAPI LIBBPF = $(BPF_PATH)libbpf.a +LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a + # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/ @@ -257,7 +288,7 @@ export PERL_PATH LIB_FILE=$(OUTPUT)libperf.a -PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) ifndef NO_LIBBPF PERFLIBS += $(LIBBPF) endif @@ -420,7 +451,7 @@ $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null -install-traceevent-plugins: $(LIBTRACEEVENT) +install-traceevent-plugins: libtraceevent_plugins $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins $(LIBAPI): fixdep FORCE @@ -431,12 +462,19 @@ $(LIBAPI)-clean: $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null $(LIBBPF): fixdep FORCE - $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a + $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null +$(LIBSUBCMD): fixdep FORCE + $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a + +$(LIBSUBCMD)-clean: + $(call QUIET_CLEAN, libsubcmd) + $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean + help: @echo 'Perf make targets:' @echo ' doc - make *all* documentation (see below)' @@ -476,7 +514,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html $(DOC_TARGETS): $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) -TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol ../include ../lib/bpf +TAG_FOLDERS= . ../lib ../include TAG_FILES= ../../include/uapi/linux/perf_event.h TAGS: @@ -555,6 +593,9 @@ endif $(call QUIET_INSTALL, perf_completion-script) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + $(call QUIET_INSTALL, perf-tip) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \ + $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)' install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ @@ -582,19 +623,31 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean config-clean +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) - $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ - $(OUTPUT)util/intel-pt-decoder/inat-tables.c + $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(OUTPUT)fixdep \ + $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) # +# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY) +# file if defined, with no further action. +feature-dump: +ifdef FEATURE_DUMP_COPY + @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY) + @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)" +else + @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP" +endif + +# # Trick: if ../../.git does not exist - we are building out of tree for example, # then force version regeneration: # diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 7fbca175099e..18b13518d8d8 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,3 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 7fbca175099e..56e05f126ad8 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,3 +1,6 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +HAVE_KVM_STAT_SUPPORT := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7b8b0d1a1b62..c8fe2074d217 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,6 @@ libperf-y += header.o libperf-y += sym-handling.o +libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h new file mode 100644 index 000000000000..0dd6b7f2d44f --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h @@ -0,0 +1,123 @@ +#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H +#define ARCH_PERF_BOOK3S_HV_HCALLS_H + +/* + * PowerPC HCALL codes : hcall code to name mapping + */ +#define kvm_trace_symbol_hcall \ + {0x4, "H_REMOVE"}, \ + {0x8, "H_ENTER"}, \ + {0xc, "H_READ"}, \ + {0x10, "H_CLEAR_MOD"}, \ + {0x14, "H_CLEAR_REF"}, \ + {0x18, "H_PROTECT"}, \ + {0x1c, "H_GET_TCE"}, \ + {0x20, "H_PUT_TCE"}, \ + {0x24, "H_SET_SPRG0"}, \ + {0x28, "H_SET_DABR"}, \ + {0x2c, "H_PAGE_INIT"}, \ + {0x30, "H_SET_ASR"}, \ + {0x34, "H_ASR_ON"}, \ + {0x38, "H_ASR_OFF"}, \ + {0x3c, "H_LOGICAL_CI_LOAD"}, \ + {0x40, "H_LOGICAL_CI_STORE"}, \ + {0x44, "H_LOGICAL_CACHE_LOAD"}, \ + {0x48, "H_LOGICAL_CACHE_STORE"}, \ + {0x4c, "H_LOGICAL_ICBI"}, \ + {0x50, "H_LOGICAL_DCBF"}, \ + {0x54, "H_GET_TERM_CHAR"}, \ + {0x58, "H_PUT_TERM_CHAR"}, \ + {0x5c, "H_REAL_TO_LOGICAL"}, \ + {0x60, "H_HYPERVISOR_DATA"}, \ + {0x64, "H_EOI"}, \ + {0x68, "H_CPPR"}, \ + {0x6c, "H_IPI"}, \ + {0x70, "H_IPOLL"}, \ + {0x74, "H_XIRR"}, \ + {0x78, "H_MIGRATE_DMA"}, \ + {0x7c, "H_PERFMON"}, \ + {0xdc, "H_REGISTER_VPA"}, \ + {0xe0, "H_CEDE"}, \ + {0xe4, "H_CONFER"}, \ + {0xe8, "H_PROD"}, \ + {0xec, "H_GET_PPP"}, \ + {0xf0, "H_SET_PPP"}, \ + {0xf4, "H_PURR"}, \ + {0xf8, "H_PIC"}, \ + {0xfc, "H_REG_CRQ"}, \ + {0x100, "H_FREE_CRQ"}, \ + {0x104, "H_VIO_SIGNAL"}, \ + {0x108, "H_SEND_CRQ"}, \ + {0x110, "H_COPY_RDMA"}, \ + {0x114, "H_REGISTER_LOGICAL_LAN"}, \ + {0x118, "H_FREE_LOGICAL_LAN"}, \ + {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"}, \ + {0x120, "H_SEND_LOGICAL_LAN"}, \ + {0x124, "H_BULK_REMOVE"}, \ + {0x130, "H_MULTICAST_CTRL"}, \ + {0x134, "H_SET_XDABR"}, \ + {0x138, "H_STUFF_TCE"}, \ + {0x13c, "H_PUT_TCE_INDIRECT"}, \ + {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"}, \ + {0x150, "H_VTERM_PARTNER_INFO"}, \ + {0x154, "H_REGISTER_VTERM"}, \ + {0x158, "H_FREE_VTERM"}, \ + {0x15c, "H_RESET_EVENTS"}, \ + {0x160, "H_ALLOC_RESOURCE"}, \ + {0x164, "H_FREE_RESOURCE"}, \ + {0x168, "H_MODIFY_QP"}, \ + {0x16c, "H_QUERY_QP"}, \ + {0x170, "H_REREGISTER_PMR"}, \ + {0x174, "H_REGISTER_SMR"}, \ + {0x178, "H_QUERY_MR"}, \ + {0x17c, "H_QUERY_MW"}, \ + {0x180, "H_QUERY_HCA"}, \ + {0x184, "H_QUERY_PORT"}, \ + {0x188, "H_MODIFY_PORT"}, \ + {0x18c, "H_DEFINE_AQP1"}, \ + {0x190, "H_GET_TRACE_BUFFER"}, \ + {0x194, "H_DEFINE_AQP0"}, \ + {0x198, "H_RESIZE_MR"}, \ + {0x19c, "H_ATTACH_MCQP"}, \ + {0x1a0, "H_DETACH_MCQP"}, \ + {0x1a4, "H_CREATE_RPT"}, \ + {0x1a8, "H_REMOVE_RPT"}, \ + {0x1ac, "H_REGISTER_RPAGES"}, \ + {0x1b0, "H_DISABLE_AND_GETC"}, \ + {0x1b4, "H_ERROR_DATA"}, \ + {0x1b8, "H_GET_HCA_INFO"}, \ + {0x1bc, "H_GET_PERF_COUNT"}, \ + {0x1c0, "H_MANAGE_TRACE"}, \ + {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"}, \ + {0x1d8, "H_POLL_PENDING"}, \ + {0x1e4, "H_QUERY_INT_STATE"}, \ + {0x244, "H_ILLAN_ATTRIBUTES"}, \ + {0x250, "H_MODIFY_HEA_QP"}, \ + {0x254, "H_QUERY_HEA_QP"}, \ + {0x258, "H_QUERY_HEA"}, \ + {0x25c, "H_QUERY_HEA_PORT"}, \ + {0x260, "H_MODIFY_HEA_PORT"}, \ + {0x264, "H_REG_BCMC"}, \ + {0x268, "H_DEREG_BCMC"}, \ + {0x26c, "H_REGISTER_HEA_RPAGES"}, \ + {0x270, "H_DISABLE_AND_GET_HEA"}, \ + {0x274, "H_GET_HEA_INFO"}, \ + {0x278, "H_ALLOC_HEA_RESOURCE"}, \ + {0x284, "H_ADD_CONN"}, \ + {0x288, "H_DEL_CONN"}, \ + {0x298, "H_JOIN"}, \ + {0x2a4, "H_VASI_STATE"}, \ + {0x2b0, "H_ENABLE_CRQ"}, \ + {0x2b8, "H_GET_EM_PARMS"}, \ + {0x2d0, "H_SET_MPP"}, \ + {0x2d4, "H_GET_MPP"}, \ + {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"}, \ + {0x2f4, "H_BEST_ENERGY"}, \ + {0x2fc, "H_XIRR_X"}, \ + {0x300, "H_RANDOM"}, \ + {0x304, "H_COP"}, \ + {0x314, "H_GET_MPP_X"}, \ + {0x31c, "H_SET_MODE"}, \ + {0xf000, "H_RTAS"} \ + +#endif diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h new file mode 100644 index 000000000000..e68ba2da8970 --- /dev/null +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -0,0 +1,33 @@ +#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H +#define ARCH_PERF_BOOK3S_HV_EXITS_H + +/* + * PowerPC Interrupt vectors : exit code to name mapping + */ + +#define kvm_trace_symbol_exit \ + {0x0, "RETURN_TO_HOST"}, \ + {0x100, "SYSTEM_RESET"}, \ + {0x200, "MACHINE_CHECK"}, \ + {0x300, "DATA_STORAGE"}, \ + {0x380, "DATA_SEGMENT"}, \ + {0x400, "INST_STORAGE"}, \ + {0x480, "INST_SEGMENT"}, \ + {0x500, "EXTERNAL"}, \ + {0x501, "EXTERNAL_LEVEL"}, \ + {0x502, "EXTERNAL_HV"}, \ + {0x600, "ALIGNMENT"}, \ + {0x700, "PROGRAM"}, \ + {0x800, "FP_UNAVAIL"}, \ + {0x900, "DECREMENTER"}, \ + {0x980, "HV_DECREMENTER"}, \ + {0xc00, "SYSCALL"}, \ + {0xd00, "TRACE"}, \ + {0xe00, "H_DATA_STORAGE"}, \ + {0xe20, "H_INST_STORAGE"}, \ + {0xe40, "H_EMUL_ASSIST"}, \ + {0xf00, "PERFMON"}, \ + {0xf20, "ALTIVEC"}, \ + {0xf40, "VSX"} + +#endif diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c new file mode 100644 index 000000000000..74eee30398f8 --- /dev/null +++ b/tools/perf/arch/powerpc/util/kvm-stat.c @@ -0,0 +1,170 @@ +#include "util/kvm-stat.h" +#include "util/parse-events.h" +#include "util/debug.h" + +#include "book3s_hv_exits.h" +#include "book3s_hcalls.h" + +#define NR_TPS 4 + +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 40; +const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter"; +const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit"; + +define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit); +define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall); + +/* Tracepoints specific to ppc_book3s_hv */ +const char *ppc_book3s_hv_kvm_tp[] = { + "kvm_hv:kvm_guest_enter", + "kvm_hv:kvm_guest_exit", + "kvm_hv:kvm_hcall_enter", + "kvm_hv:kvm_hcall_exit", + NULL, +}; + +/* 1 extra placeholder for NULL */ +const char *kvm_events_tp[NR_TPS + 1]; +const char *kvm_exit_reason; + +static void hcall_event_get_key(struct perf_evsel *evsel, + struct perf_sample *sample, + struct event_key *key) +{ + key->info = 0; + key->key = perf_evsel__intval(evsel, sample, "req"); +} + +static const char *get_hcall_exit_reason(u64 exit_code) +{ + struct exit_reasons_table *tbl = hcall_reasons; + + while (tbl->reason != NULL) { + if (tbl->exit_code == exit_code) + return tbl->reason; + tbl++; + } + + pr_debug("Unknown hcall code: %lld\n", + (unsigned long long)exit_code); + return "UNKNOWN"; +} + +static bool hcall_event_end(struct perf_evsel *evsel, + struct perf_sample *sample __maybe_unused, + struct event_key *key __maybe_unused) +{ + return (!strcmp(evsel->name, kvm_events_tp[3])); +} + +static bool hcall_event_begin(struct perf_evsel *evsel, + struct perf_sample *sample, struct event_key *key) +{ + if (!strcmp(evsel->name, kvm_events_tp[2])) { + hcall_event_get_key(evsel, sample, key); + return true; + } + + return false; +} +static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, + struct event_key *key, + char *decode) +{ + const char *hcall_reason = get_hcall_exit_reason(key->key); + + scnprintf(decode, decode_str_len, "%s", hcall_reason); +} + +static struct kvm_events_ops hcall_events = { + .is_begin_event = hcall_event_begin, + .is_end_event = hcall_event_end, + .decode_key = hcall_event_decode_key, + .name = "HCALL-EVENT", +}; + +static struct kvm_events_ops exit_events = { + .is_begin_event = exit_event_begin, + .is_end_event = exit_event_end, + .decode_key = exit_event_decode_key, + .name = "VM-EXIT" +}; + +struct kvm_reg_events_ops kvm_reg_events_ops[] = { + { .name = "vmexit", .ops = &exit_events }, + { .name = "hcall", .ops = &hcall_events }, + { NULL, NULL }, +}; + +const char * const kvm_skip_events[] = { + NULL, +}; + + +static int is_tracepoint_available(const char *str, struct perf_evlist *evlist) +{ + struct parse_events_error err; + int ret; + + err.str = NULL; + ret = parse_events(evlist, str, &err); + if (err.str) + pr_err("%s : %s\n", str, err.str); + return ret; +} + +static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm, + struct perf_evlist *evlist) +{ + const char **events_ptr; + int i, nr_tp = 0, err = -1; + + /* Check for book3s_hv tracepoints */ + for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) { + err = is_tracepoint_available(*events_ptr, evlist); + if (err) + return -1; + nr_tp++; + } + + for (i = 0; i < nr_tp; i++) + kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i]; + + kvm_events_tp[i] = NULL; + kvm_exit_reason = "trap"; + kvm->exit_reasons = hv_exit_reasons; + kvm->exit_reasons_isa = "HV"; + + return 0; +} + +/* Wrapper to setup kvm tracepoints */ +static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist == NULL) + return -ENOMEM; + + /* Right now, only supported on book3s_hv */ + return ppc__setup_book3s_hv(kvm, evlist); +} + +int setup_kvm_events_tp(struct perf_kvm_stat *kvm) +{ + return ppc__setup_kvm_tp(kvm); +} + +int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused) +{ + int ret; + + ret = ppc__setup_kvm_tp(kvm); + if (ret) { + kvm->exit_reasons = NULL; + kvm->exit_reasons_isa = NULL; + } + + return ret; +} diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index a5dbc07ec9dc..ed57df2e6d68 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -10,7 +10,7 @@ */ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/sie.h> define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes); @@ -18,6 +18,12 @@ define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes); define_exit_reasons_table(sie_diagnose_codes, diagnose_codes); define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes); +const char *vcpu_id_str = "id"; +const int decode_str_len = 40; +const char *kvm_exit_reason = "icptcode"; +const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter"; +const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit"; + static void event_icpt_insn_get_key(struct perf_evsel *evsel, struct perf_sample *sample, struct event_key *key) @@ -73,7 +79,7 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_s390_sie_enter", "kvm:kvm_s390_sie_exit", "kvm:kvm_s390_intercept_instruction", diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 09ba923debe8..269af2143735 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -3,3 +3,4 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 7ed00f4b0908..b48de2f5813c 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -2,10 +2,10 @@ #define ARCH_TESTS_H /* Tests */ -int test__rdpmc(void); -int test__perf_time_to_tsc(void); -int test__insn_x86(void); -int test__intel_cqm_count_nmi_context(void); +int test__rdpmc(int subtest); +int test__perf_time_to_tsc(int subtest); +int test__insn_x86(int subtest); +int test__intel_cqm_count_nmi_context(int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index b6115dfd28f0..08d9b2bc185c 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -171,7 +171,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64) * verbose (-v) option to see all the instructions and whether or not they * decoded successfuly. */ -int test__insn_x86(void) +int test__insn_x86(int subtest __maybe_unused) { int ret = 0; diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c index d28c1b6a3b54..7f064eb37158 100644 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ b/tools/perf/arch/x86/tests/intel-cqm.c @@ -17,7 +17,7 @@ static pid_t spawn(void) if (pid) return pid; - while(1); + while(1) sleep(5); return 0; } @@ -33,7 +33,7 @@ static pid_t spawn(void) * the last read counter value to avoid triggering a WARN_ON_ONCE() in * smp_call_function_many() caused by sending IPIs from NMI context. */ -int test__intel_cqm_count_nmi_context(void) +int test__intel_cqm_count_nmi_context(int subtest __maybe_unused) { struct perf_evlist *evlist = NULL; struct perf_evsel *evsel = NULL; @@ -54,7 +54,7 @@ int test__intel_cqm_count_nmi_context(void) ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL); if (ret) { - pr_debug("parse_events failed\n"); + pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); err = TEST_SKIP; goto out; } diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index 658cd200af74..9d29ee283ac5 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -35,13 +35,12 @@ * %0 is returned, otherwise %-1 is returned. If TSC conversion is not * supported then then the test passes but " (not supported)" is printed. */ -int test__perf_time_to_tsc(void) +int test__perf_time_to_tsc(int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, - .freq = 4000, .target = { .uses_mmap = true, }, diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c index e7688214c7cf..72193f19d6d7 100644 --- a/tools/perf/arch/x86/tests/rdpmc.c +++ b/tools/perf/arch/x86/tests/rdpmc.c @@ -59,7 +59,7 @@ static u64 mmap_read_self(void *addr) u64 quot, rem; quot = (cyc >> time_shift); - rem = cyc & ((1 << time_shift) - 1); + rem = cyc & (((u64)1 << time_shift) - 1); delta = time_offset + quot * time_mult + ((rem * time_mult) >> time_shift); @@ -103,6 +103,7 @@ static int __test__rdpmc(void) sigfillset(&sa.sa_mask); sa.sa_sigaction = segfault_handler; + sa.sa_flags = 0; sigaction(SIGSEGV, &sa, NULL); fd = sys_perf_event_open(&attr, 0, -1, -1, @@ -149,7 +150,7 @@ out_close: return 0; } -int test__rdpmc(void) +int test__rdpmc(int subtest __maybe_unused) { int status = 0; int wret = 0; diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index ff63649fa9ac..465970370f3e 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -5,6 +5,7 @@ libperf-y += kvm-stat.o libperf-y += perf_regs.o libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 9b94ce520917..d66f9ad4df2e 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -60,7 +60,9 @@ struct branch { u64 misc; }; -static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_BTS_AUXTRACE_PRIV_SIZE; } @@ -327,7 +329,7 @@ static int intel_bts_snapshot_start(struct auxtrace_record *itr) evlist__for_each(btsr->evlist, evsel) { if (evsel->attr.type == btsr->intel_bts_pmu->type) - return perf_evlist__disable_event(btsr->evlist, evsel); + return perf_evsel__disable(evsel); } return -EINVAL; } @@ -340,7 +342,7 @@ static int intel_bts_snapshot_finish(struct auxtrace_record *itr) evlist__for_each(btsr->evlist, evsel) { if (evsel->attr.type == btsr->intel_bts_pmu->type) - return perf_evlist__enable_event(btsr->evlist, evsel); + return perf_evsel__enable(evsel); } return -EINVAL; } diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index b02af064f0f9..a3395179c9ee 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -26,7 +26,7 @@ #include "../../util/evlist.h" #include "../../util/evsel.h" #include "../../util/cpumap.h" -#include "../../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../../util/parse-events.h" #include "../../util/pmu.h" #include "../../util/debug.h" @@ -89,7 +89,7 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats, *config = attr.config; out_free: - parse_events__free_terms(terms); + parse_events_terms__delete(terms); return err; } @@ -273,7 +273,9 @@ intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) return attr; } -static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) +static size_t +intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) { return INTEL_PT_AUXTRACE_PRIV_SIZE; } @@ -725,7 +727,7 @@ static int intel_pt_snapshot_start(struct auxtrace_record *itr) evlist__for_each(ptr->evlist, evsel) { if (evsel->attr.type == ptr->intel_pt_pmu->type) - return perf_evlist__disable_event(ptr->evlist, evsel); + return perf_evsel__disable(evsel); } return -EINVAL; } @@ -738,7 +740,7 @@ static int intel_pt_snapshot_finish(struct auxtrace_record *itr) evlist__for_each(ptr->evlist, evsel) { if (evsel->attr.type == ptr->intel_pt_pmu->type) - return perf_evlist__enable_event(ptr->evlist, evsel); + return perf_evsel__enable(evsel); } return -EINVAL; } diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 14e4e668fad7..b63d4be655a2 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c @@ -1,5 +1,7 @@ #include "../../util/kvm-stat.h" -#include <asm/kvm_perf.h> +#include <asm/svm.h> +#include <asm/vmx.h> +#include <asm/kvm.h> define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS); define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS); @@ -11,6 +13,12 @@ static struct kvm_events_ops exit_events = { .name = "VM-EXIT" }; +const char *vcpu_id_str = "vcpu_id"; +const int decode_str_len = 20; +const char *kvm_exit_reason = "exit_reason"; +const char *kvm_entry_trace = "kvm:kvm_entry"; +const char *kvm_exit_trace = "kvm:kvm_exit"; + /* * For the mmio events, we treat: * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry @@ -65,7 +73,7 @@ static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#lx:%s", + scnprintf(decode, decode_str_len, "%#lx:%s", (unsigned long)key->key, key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R"); } @@ -109,7 +117,7 @@ static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, struct event_key *key, char *decode) { - scnprintf(decode, DECODE_STR_LEN, "%#llx:%s", + scnprintf(decode, decode_str_len, "%#llx:%s", (unsigned long long)key->key, key->info ? "POUT" : "PIN"); } @@ -121,7 +129,7 @@ static struct kvm_events_ops ioport_events = { .name = "IO Port Access" }; -const char * const kvm_events_tp[] = { +const char *kvm_events_tp[] = { "kvm:kvm_entry", "kvm:kvm_exit", "kvm:kvm_mmio", diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fc9bebd2cca0..0999ac536d86 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -11,7 +11,7 @@ #include "../perf.h" #include "../util/util.h" #include "../util/stat.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index bc6a16adbca8..6a18ce21f865 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -5,7 +5,7 @@ #include "../perf.h" #include "../util/util.h" #include "../util/stat.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index ad0d9b5342fb..718238683013 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -11,7 +11,7 @@ #include "../perf.h" #include "../util/util.h" #include "../util/stat.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 6d8c9fa2a16c..91aaf2a1fa90 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -10,7 +10,7 @@ #include "../perf.h" #include "../util/util.h" #include "../util/stat.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index e5e41d3bdce7..f416bd705f66 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -11,7 +11,7 @@ #include "../perf.h" #include "../util/util.h" #include "../util/stat.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "bench.h" #include "futex.h" diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 9419b944220f..a91aa85d80ff 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -8,7 +8,7 @@ #include "../perf.h" #include "../util/util.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/header.h" #include "../util/cloexec.h" #include "bench.h" diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index e4c2c30143b9..5c3cce082cb8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,6 +1,11 @@ + +/* Various wrappers to make the kernel .S file build in user-space: */ + #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl +#define _ASM_EXTABLE_FAULT(x, y) + #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 492df2752a2d..5049d6357a46 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -7,7 +7,7 @@ #include "../perf.h" #include "../builtin.h" #include "../util/util.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../util/cloexec.h" #include "bench.h" diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index d4ff1b539cfd..bfaf9503de8e 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -11,7 +11,7 @@ #include "../perf.h" #include "../util/util.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../builtin.h" #include "bench.h" diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 005cc283790c..1dc2d13cc272 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -10,7 +10,7 @@ */ #include "../perf.h" #include "../util/util.h" -#include "../util/parse-options.h" +#include <subcmd/parse-options.h> #include "../builtin.h" #include "bench.h" diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2bf9b3fd9e61..cfe366375c4b 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -21,7 +21,7 @@ #include "util/evsel.h" #include "util/annotate.h" #include "util/event.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/thread.h" #include "util/sort.h" @@ -47,7 +47,7 @@ struct perf_annotate { }; static int perf_evsel__add_sample(struct perf_evsel *evsel, - struct perf_sample *sample __maybe_unused, + struct perf_sample *sample, struct addr_location *al, struct perf_annotate *ann) { @@ -72,7 +72,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, return 0; } - he = __hists__add_entry(hists, al, NULL, NULL, NULL, 1, 1, 0, true); + sample->period = 1; + sample->weight = 1; + + he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true); if (he == NULL) return -ENOMEM; @@ -242,7 +245,7 @@ static int __cmd_annotate(struct perf_annotate *ann) hists__collapse_resort(hists, NULL); /* Don't sort callchain */ perf_evsel__reset_sample_bit(pos, CALLCHAIN); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(pos, NULL); if (symbol_conf.event_group && !perf_evsel__is_group_leader(pos)) @@ -343,18 +346,19 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) return ret; argc = parse_options(argc, argv, options, annotate_usage, 0); + if (argc) { + /* + * Special case: if there's an argument left then assume that + * it's a symbol filter: + */ + if (argc > 1) + usage_with_options(annotate_usage, options); - if (annotate.use_stdio) - use_browser = 0; - else if (annotate.use_tui) - use_browser = 1; - else if (annotate.use_gtk) - use_browser = 2; + annotate.sym_hist_filter = argv[0]; + } file.path = input_name; - setup_browser(true); - annotate.session = perf_session__new(&file, false, &annotate.tool); if (annotate.session == NULL) return -1; @@ -366,19 +370,17 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) if (ret < 0) goto out_delete; - if (setup_sorting() < 0) + if (setup_sorting(NULL) < 0) usage_with_options(annotate_usage, options); - if (argc) { - /* - * Special case: if there's an argument left then assume that - * it's a symbol filter: - */ - if (argc > 1) - usage_with_options(annotate_usage, options); + if (annotate.use_stdio) + use_browser = 0; + else if (annotate.use_tui) + use_browser = 1; + else if (annotate.use_gtk) + use_browser = 2; - annotate.sym_hist_filter = argv[0]; - } + setup_browser(true); ret = __cmd_annotate(&annotate); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b17aed36ca16..a1cddc6bbf0f 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -16,7 +16,7 @@ */ #include "perf.h" #include "util/util.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "builtin.h" #include "bench/bench.h" diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 7b8450cd33c2..632efc6b79a0 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -16,7 +16,7 @@ #include "util/cache.h" #include "util/debug.h" #include "util/header.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/strlist.h" #include "util/build-id.h" #include "util/session.h" @@ -38,19 +38,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) static int build_id_cache__kcore_dir(char *dir, size_t sz) { - struct timeval tv; - struct tm tm; - char dt[32]; - - if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) - return -1; - - if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) - return -1; - - scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); - - return 0; + return fetch_current_timestamp(dir, sz); } static bool same_kallsyms_reloc(const char *from_dir, char *to_dir) diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 918b4de29de4..5e914ee79eb3 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -12,7 +12,7 @@ #include "util/build-id.h" #include "util/cache.h" #include "util/debug.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/session.h" #include "util/symbol.h" #include "util/data.h" @@ -110,7 +110,7 @@ int cmd_buildid_list(int argc, const char **argv, setup_pager(); if (show_kernel) - return sysfs__fprintf_build_id(stdout); + return !(sysfs__fprintf_build_id(stdout) > 0); return perf_session__list_build_ids(force, with_hits); } diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c new file mode 100644 index 000000000000..c42448ed5dfe --- /dev/null +++ b/tools/perf/builtin-config.c @@ -0,0 +1,87 @@ +/* + * builtin-config.c + * + * Copyright (C) 2015, Taeung Song <treeze.taeung@gmail.com> + * + */ +#include "builtin.h" + +#include "perf.h" + +#include "util/cache.h" +#include <subcmd/parse-options.h> +#include "util/util.h" +#include "util/debug.h" + +static bool use_system_config, use_user_config; + +static const char * const config_usage[] = { + "perf config [<file-option>] [options]", + NULL +}; + +enum actions { + ACTION_LIST = 1 +} actions; + +static struct option config_options[] = { + OPT_SET_UINT('l', "list", &actions, + "show current config variables", ACTION_LIST), + OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"), + OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"), + OPT_END() +}; + +static int show_config(const char *key, const char *value, + void *cb __maybe_unused) +{ + if (value) + printf("%s=%s\n", key, value); + else + printf("%s\n", key); + + return 0; +} + +int cmd_config(int argc, const char **argv, const char *prefix __maybe_unused) +{ + int ret = 0; + char *user_config = mkpath("%s/.perfconfig", getenv("HOME")); + + argc = parse_options(argc, argv, config_options, config_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (use_system_config && use_user_config) { + pr_err("Error: only one config file at a time\n"); + parse_options_usage(config_usage, config_options, "user", 0); + parse_options_usage(NULL, config_options, "system", 0); + return -1; + } + + if (use_system_config) + config_exclusive_filename = perf_etc_perfconfig(); + else if (use_user_config) + config_exclusive_filename = user_config; + + switch (actions) { + case ACTION_LIST: + if (argc) { + pr_err("Error: takes no arguments\n"); + parse_options_usage(config_usage, config_options, "l", 1); + } else { + ret = perf_config(show_config, NULL); + if (ret < 0) { + const char * config_filename = config_exclusive_filename; + if (!config_exclusive_filename) + config_filename = user_config; + pr_err("Nothing configured, " + "please check your %s \n", config_filename); + } + } + break; + default: + usage_with_options(config_usage, config_options); + } + + return ret; +} diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c index d6525bc54d13..b97bc1518b44 100644 --- a/tools/perf/builtin-data.c +++ b/tools/perf/builtin-data.c @@ -2,7 +2,7 @@ #include "builtin.h" #include "perf.h" #include "debug.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "data-convert-bt.h" typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 0b180a885ba3..4d72359fd15a 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -311,11 +311,11 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair, } static int hists__add_entry(struct hists *hists, - struct addr_location *al, u64 period, - u64 weight, u64 transaction) + struct addr_location *al, + struct perf_sample *sample) { - if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, - transaction, true) != NULL) + if (__hists__add_entry(hists, al, NULL, NULL, NULL, + sample, true) != NULL) return 0; return -ENOMEM; } @@ -336,8 +336,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } - if (hists__add_entry(hists, &al, sample->period, - sample->weight, sample->transaction)) { + if (hists__add_entry(hists, &al, sample)) { pr_warning("problem incrementing symbol period, skipping event\n"); goto out_put; } @@ -1208,7 +1207,7 @@ static int ui_init(void) BUG_ON(1); } - list_add(&fmt->sort_list, &perf_hpp__sort_list); + perf_hpp__register_sort_field(fmt); return 0; } @@ -1265,8 +1264,6 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) if (ret < 0) return ret; - perf_config(perf_default_config, NULL); - argc = parse_options(argc, argv, options, diff_usage, 0); if (symbol__init(NULL) < 0) @@ -1280,7 +1277,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused) sort__mode = SORT_MODE__DIFF; - if (setup_sorting() < 0) + if (setup_sorting(NULL) < 0) usage_with_options(diff_usage, options); setup_pager(); diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index f4d62510acbb..8a31f511e1a0 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -12,7 +12,7 @@ #include "util/evlist.h" #include "util/evsel.h" #include "util/parse-events.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/session.h" #include "util/data.h" #include "util/debug.h" @@ -26,14 +26,22 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details .mode = PERF_DATA_MODE_READ, .force = details->force, }; + bool has_tracepoint = false; session = perf_session__new(&file, 0, NULL); if (session == NULL) return -1; - evlist__for_each(session->evlist, pos) + evlist__for_each(session->evlist, pos) { perf_evsel__fprintf(pos, details, stdout); + if (pos->attr.type == PERF_TYPE_TRACEPOINT) + has_tracepoint = true; + } + + if (has_tracepoint && !details->trace_fields) + printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n"); + perf_session__delete(session); return 0; } @@ -49,6 +57,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('g', "group", &details.event_group, "Show event group information"), OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"), + OPT_BOOLEAN(0, "trace-fields", &details.trace_fields, "Show tracepoint fields"), OPT_END() }; const char * const evlist_usage[] = { diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index a7d588bf3cdd..49d55e21b1b0 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -6,11 +6,11 @@ #include "perf.h" #include "util/cache.h" #include "builtin.h" -#include "util/exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "common-cmds.h" -#include "util/parse-options.h" -#include "util/run-command.h" -#include "util/help.h" +#include <subcmd/parse-options.h> +#include <subcmd/run-command.h> +#include <subcmd/help.h> #include "util/debug.h" static struct man_viewer_list { @@ -86,8 +86,7 @@ static int check_emacsclient_version(void) return -1; } - strbuf_remove(&buffer, 0, strlen("emacsclient")); - version = atoi(buffer.buf); + version = atoi(buffer.buf + strlen("emacsclient")); if (version < 22) { fprintf(stderr, @@ -273,7 +272,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) if (!prefixcmp(var, "man.")) return add_man_viewer_info(var, value); - return perf_default_config(var, value, cb); + return 0; } static struct cmdnames main_cmds, other_cmds; @@ -407,7 +406,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page) #ifndef open_html static void open_html(const char *path) { - execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); + execl_cmd("web--browse", "-c", "help.browser", path, NULL); } #endif diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 0a945d2e8ca5..7fa68663ed72 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -17,8 +17,9 @@ #include "util/build-id.h" #include "util/data.h" #include "util/auxtrace.h" +#include "util/jit.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include <linux/list.h> @@ -29,6 +30,7 @@ struct perf_inject { bool sched_stat; bool have_auxtrace; bool strip; + bool jit_mode; const char *input_name; struct perf_data_file output; u64 bytes_written; @@ -71,6 +73,15 @@ static int perf_event__repipe_oe_synth(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +#ifdef HAVE_JITDUMP +static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct ordered_events *oe __maybe_unused) +{ + return 0; +} +#endif + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -234,6 +245,31 @@ static int perf_event__repipe_mmap(struct perf_tool *tool, return err; } +#ifdef HAVE_JITDUMP +static int perf_event__jit_repipe_mmap(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + int ret; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + ret = jit_process(inject->session, &inject->output, machine, + event->mmap.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_mmap2(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -247,6 +283,31 @@ static int perf_event__repipe_mmap2(struct perf_tool *tool, return err; } +#ifdef HAVE_JITDUMP +static int perf_event__jit_repipe_mmap2(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, tool); + u64 n = 0; + int ret; + + /* + * if jit marker, then inject jit mmaps and generate ELF images + */ + ret = jit_process(inject->session, &inject->output, machine, + event->mmap2.filename, sample->pid, &n); + if (ret < 0) + return ret; + if (ret) { + inject->bytes_written += n; + return 0; + } + return perf_event__repipe_mmap2(tool, event, sample, machine); +} +#endif + static int perf_event__repipe_fork(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -626,12 +687,16 @@ static int __cmd_inject(struct perf_inject *inject) ret = perf_session__process_events(session); if (!file_out->is_pipe) { - if (inject->build_ids) { + if (inject->build_ids) perf_header__set_feat(&session->header, HEADER_BUILD_ID); - if (inject->have_auxtrace) - dsos__hit_all(session); - } + /* + * Keep all buildids when there is unprocessed AUX data because + * it is not known which ones the AUX trace hits. + */ + if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) && + inject->have_auxtrace && !inject->itrace_synth_opts.set) + dsos__hit_all(session); /* * The AUX areas have been removed and replaced with * synthesized hardware events, so clear the feature flag and @@ -675,6 +740,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, + .lost_samples = perf_event__repipe, .aux = perf_event__repipe, .itrace_start = perf_event__repipe, .context_switch = perf_event__repipe, @@ -702,7 +768,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) }; int ret; - const struct option options[] = { + struct option options[] = { OPT_BOOLEAN('b', "build-ids", &inject.build_ids, "Inject build-ids into the output stream"), OPT_STRING('i', "input", &inject.input_name, "file", @@ -712,6 +778,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, "Merge sched-stat and sched-switch for getting events " "where and how long tasks slept"), +#ifdef HAVE_JITDUMP + OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"), +#endif OPT_INCR('v', "verbose", &verbose, "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", @@ -728,7 +797,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "perf inject [<options>]", NULL }; - +#ifndef HAVE_JITDUMP + set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true); +#endif argc = parse_options(argc, argv, options, inject_usage, 0); /* @@ -754,6 +825,29 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) if (inject.session == NULL) return -1; + if (inject.build_ids) { + /* + * to make sure the mmap records are ordered correctly + * and so that the correct especially due to jitted code + * mmaps. We cannot generate the buildid hit list and + * inject the jit mmaps at the same time for now. + */ + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + } +#ifdef HAVE_JITDUMP + if (inject.jit_mode) { + inject.tool.mmap2 = perf_event__jit_repipe_mmap2; + inject.tool.mmap = perf_event__jit_repipe_mmap; + inject.tool.ordered_events = true; + inject.tool.ordering_requires_timestamps = true; + /* + * JIT MMAP injection injects all MMAP events in one go, so it + * does not obey finished_round semantics. + */ + inject.tool.finished_round = perf_event__drop_oe; + } +#endif ret = symbol__init(&inject.session->header.env); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 93ce665f976f..c9cb3be47cff 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -12,7 +12,7 @@ #include "util/tool.h" #include "util/callchain.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/data.h" #include "util/cpumap.h" @@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b) return fa->flags - fb->flags; } -/* see include/trace/events/gfpflags.h */ +/* see include/trace/events/mmflags.h */ static const struct { const char *original; const char *compact; @@ -612,30 +612,39 @@ static const struct { { "GFP_HIGHUSER", "HU" }, { "GFP_USER", "U" }, { "GFP_TEMPORARY", "TMP" }, + { "GFP_KERNEL_ACCOUNT", "KAC" }, { "GFP_KERNEL", "K" }, { "GFP_NOFS", "NF" }, { "GFP_ATOMIC", "A" }, { "GFP_NOIO", "NI" }, - { "GFP_HIGH", "H" }, - { "GFP_WAIT", "W" }, - { "GFP_IO", "I" }, - { "GFP_COLD", "CO" }, - { "GFP_NOWARN", "NWR" }, - { "GFP_REPEAT", "R" }, - { "GFP_NOFAIL", "NF" }, - { "GFP_NORETRY", "NR" }, - { "GFP_COMP", "C" }, - { "GFP_ZERO", "Z" }, - { "GFP_NOMEMALLOC", "NMA" }, - { "GFP_MEMALLOC", "MA" }, - { "GFP_HARDWALL", "HW" }, - { "GFP_THISNODE", "TN" }, - { "GFP_RECLAIMABLE", "RC" }, - { "GFP_MOVABLE", "M" }, - { "GFP_NOTRACK", "NT" }, - { "GFP_NO_KSWAPD", "NK" }, - { "GFP_OTHER_NODE", "ON" }, { "GFP_NOWAIT", "NW" }, + { "GFP_DMA", "D" }, + { "__GFP_HIGHMEM", "HM" }, + { "GFP_DMA32", "D32" }, + { "__GFP_HIGH", "H" }, + { "__GFP_ATOMIC", "_A" }, + { "__GFP_IO", "I" }, + { "__GFP_FS", "F" }, + { "__GFP_COLD", "CO" }, + { "__GFP_NOWARN", "NWR" }, + { "__GFP_REPEAT", "R" }, + { "__GFP_NOFAIL", "NF" }, + { "__GFP_NORETRY", "NR" }, + { "__GFP_COMP", "C" }, + { "__GFP_ZERO", "Z" }, + { "__GFP_NOMEMALLOC", "NMA" }, + { "__GFP_MEMALLOC", "MA" }, + { "__GFP_HARDWALL", "HW" }, + { "__GFP_THISNODE", "TN" }, + { "__GFP_RECLAIMABLE", "RC" }, + { "__GFP_MOVABLE", "M" }, + { "__GFP_ACCOUNT", "AC" }, + { "__GFP_NOTRACK", "NT" }, + { "__GFP_WRITE", "WR" }, + { "__GFP_RECLAIM", "R" }, + { "__GFP_DIRECT_RECLAIM", "DR" }, + { "__GFP_KSWAPD_RECLAIM", "KR" }, + { "__GFP_OTHER_NODE", "ON" }, }; static size_t max_gfp_len; @@ -1834,7 +1843,7 @@ static int __cmd_record(int argc, const char **argv) return cmd_record(i, rec_argv, NULL); } -static int kmem_config(const char *var, const char *value, void *cb) +static int kmem_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "kmem.default")) { if (!strcmp(value, "slab")) @@ -1847,7 +1856,7 @@ static int kmem_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index dd94b4ca2213..bff666458b28 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -10,7 +10,7 @@ #include "util/header.h" #include "util/session.h" #include "util/intlist.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/debug.h" #include "util/tool.h" @@ -30,7 +30,6 @@ #include <math.h> #ifdef HAVE_KVM_STAT_SUPPORT -#include <asm/kvm_perf.h> #include "util/kvm-stat.h" void exit_event_get_key(struct perf_evsel *evsel, @@ -38,12 +37,12 @@ void exit_event_get_key(struct perf_evsel *evsel, struct event_key *key) { key->info = 0; - key->key = perf_evsel__intval(evsel, sample, KVM_EXIT_REASON); + key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason); } bool kvm_exit_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_EXIT_TRACE); + return !strcmp(evsel->name, kvm_exit_trace); } bool exit_event_begin(struct perf_evsel *evsel, @@ -59,7 +58,7 @@ bool exit_event_begin(struct perf_evsel *evsel, bool kvm_entry_event(struct perf_evsel *evsel) { - return !strcmp(evsel->name, KVM_ENTRY_TRACE); + return !strcmp(evsel->name, kvm_entry_trace); } bool exit_event_end(struct perf_evsel *evsel, @@ -91,7 +90,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, const char *exit_reason = get_exit_reason(kvm, key->exit_reasons, key->key); - scnprintf(decode, DECODE_STR_LEN, "%s", exit_reason); + scnprintf(decode, decode_str_len, "%s", exit_reason); } static bool register_kvm_events_ops(struct perf_kvm_stat *kvm) @@ -357,7 +356,7 @@ static bool handle_end_event(struct perf_kvm_stat *kvm, time_diff = sample->time - time_begin; if (kvm->duration && time_diff > kvm->duration) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; kvm->events_ops->decode_key(kvm, &event->key, decode); if (!skip_event(decode)) { @@ -385,7 +384,8 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread, return NULL; } - vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, VCPU_ID); + vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample, + vcpu_id_str); thread__set_priv(thread, vcpu_record); } @@ -574,7 +574,7 @@ static void show_timeofday(void) static void print_result(struct perf_kvm_stat *kvm) { - char decode[DECODE_STR_LEN]; + char decode[decode_str_len]; struct kvm_event *event; int vcpu = kvm->trace_vcpu; @@ -585,7 +585,7 @@ static void print_result(struct perf_kvm_stat *kvm) pr_info("\n\n"); print_vcpu_info(kvm); - pr_info("%*s ", DECODE_STR_LEN, kvm->events_ops->name); + pr_info("%*s ", decode_str_len, kvm->events_ops->name); pr_info("%10s ", "Samples"); pr_info("%9s ", "Samples%"); @@ -604,7 +604,7 @@ static void print_result(struct perf_kvm_stat *kvm) min = get_event_min(event, vcpu); kvm->events_ops->decode_key(kvm, &event->key, decode); - pr_info("%*s ", DECODE_STR_LEN, decode); + pr_info("%*s ", decode_str_len, decode); pr_info("%10llu ", (unsigned long long)ecount); pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100); pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100); @@ -1132,6 +1132,11 @@ exit: _p; \ }) +int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused) +{ + return 0; +} + static int kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) { @@ -1148,7 +1153,14 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv) NULL }; const char * const *events_tp; + int ret; + events_tp_size = 0; + ret = setup_kvm_events_tp(kvm); + if (ret < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return ret; + } for (events_tp = kvm_events_tp; *events_tp; events_tp++) events_tp_size++; @@ -1351,7 +1363,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, disable_buildid_cache(); use_browser = 0; - setup_browser(false); if (argc) { argc = parse_options(argc, argv, live_options, @@ -1378,6 +1389,12 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, /* * generate the event list */ + err = setup_kvm_events_tp(kvm); + if (err < 0) { + pr_err("Unable to setup the kvm tracepoints\n"); + return err; + } + kvm->evlist = kvm_live_event_list(); if (kvm->evlist == NULL) { err = -1; @@ -1409,8 +1426,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, err = kvm_events_live_report(kvm); out: - exit_browser(0); - if (kvm->session) perf_session__delete(kvm->session); kvm->session = NULL; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index bf679e2c978b..5e22db4684b8 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -14,7 +14,7 @@ #include "util/parse-events.h" #include "util/cache.h" #include "util/pmu.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) { diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index de16aaed516e..ce3bfb48b26f 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -9,7 +9,7 @@ #include "util/thread.h" #include "util/header.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/debug.h" diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 80170aace5d4..88aeac9aa1da 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -1,11 +1,13 @@ #include "builtin.h" #include "perf.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" #include "util/data.h" +#include "util/mem-events.h" +#include "util/debug.h" #define MEM_OPERATION_LOAD 0x1 #define MEM_OPERATION_STORE 0x2 @@ -21,11 +23,56 @@ struct perf_mem { DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; +static int parse_record_events(const struct option *opt, + const char *str, int unset __maybe_unused) +{ + struct perf_mem *mem = *(struct perf_mem **)opt->value; + int j; + + if (strcmp(str, "list")) { + if (!perf_mem_events__parse(str)) { + mem->operation = 0; + return 0; + } + exit(-1); + } + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + fprintf(stderr, "%-13s%-*s%s\n", + e->tag, + verbose ? 25 : 0, + verbose ? perf_mem_events__name(j) : "", + e->supported ? ": available" : ""); + } + exit(0); +} + +static const char * const __usage[] = { + "perf mem record [<options>] [<command>]", + "perf mem record [<options>] -- <command> [<options>]", + NULL +}; + +static const char * const *record_mem_usage = __usage; + static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) { int rec_argc, i = 0, j; const char **rec_argv; int ret; + struct option options[] = { + OPT_CALLBACK('e', "event", &mem, "event", + "event selector. use 'perf mem record -e list' to list available events", + parse_record_events), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_END() + }; + + argc = parse_options(argc, argv, options, record_mem_usage, + PARSE_OPT_STOP_AT_NON_OPTION); rec_argc = argc + 7; /* max number of arguments */ rec_argv = calloc(rec_argc + 1, sizeof(char *)); @@ -35,23 +82,40 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "record"; if (mem->operation & MEM_OPERATION_LOAD) + perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; rec_argv[i++] = "-d"; - if (mem->operation & MEM_OPERATION_LOAD) { - rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-loads/pp"; - } + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + if (!perf_mem_events[j].record) + continue; + + if (!perf_mem_events[j].supported) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events__name(j)); + return -1; + } - if (mem->operation & MEM_OPERATION_STORE) { rec_argv[i++] = "-e"; - rec_argv[i++] = "cpu/mem-stores/pp"; - } + rec_argv[i++] = perf_mem_events__name(j); + }; - for (j = 1; j < argc; j++, i++) + for (j = 0; j < argc; j++, i++) rec_argv[i] = argv[j]; + if (verbose > 0) { + pr_debug("calling: record "); + + while (rec_argv[j]) { + pr_debug("%s ", rec_argv[j]); + j++; + } + pr_debug("\n"); + } + ret = cmd_record(i, rec_argv, NULL); free(rec_argv); return ret; @@ -298,6 +362,10 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands, mem_usage, PARSE_OPT_STOP_AT_NON_OPTION); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 132afc97676c..9af859b28b15 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -37,7 +37,7 @@ #include "util/strfilter.h" #include "util/symbol.h" #include "util/debug.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/probe-finder.h" #include "util/probe-event.h" #include "util/probe-file.h" @@ -249,6 +249,9 @@ static int opt_show_vars(const struct option *opt, return ret; } +#else +# define opt_show_lines NULL +# define opt_show_vars NULL #endif static int opt_add_probe_event(const struct option *opt, const char *str, int unset __maybe_unused) @@ -473,7 +476,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) opt_add_probe_event), OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events" " with existing name"), -#ifdef HAVE_DWARF_SUPPORT OPT_CALLBACK('L', "line", NULL, "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]", "Show source code lines.", opt_show_lines), @@ -490,7 +492,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "directory", "path to kernel source"), OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines, "Don't search inlined functions"), -#endif OPT__DRY_RUN(&probe_event_dry_run), OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes, "Set how many probe points can be found for a probe."), @@ -521,6 +522,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) #ifdef HAVE_DWARF_SUPPORT set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); +#else +# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c) + set_nobuild('L', "line", false); + set_nobuild('V', "vars", false); + set_nobuild('\0', "externs", false); + set_nobuild('\0', "range", false); + set_nobuild('k', "vmlinux", true); + set_nobuild('s', "source", true); + set_nobuild('\0', "no-inlines", true); +# undef set_nobuild #endif set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 199fc31e3919..515510ecc76a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -11,7 +11,7 @@ #include "util/build-id.h" #include "util/util.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/callchain.h" @@ -32,6 +32,8 @@ #include "util/parse-branch-options.h" #include "util/parse-regs-options.h" #include "util/llvm-utils.h" +#include "util/bpf-loader.h" +#include "asm/bug.h" #include <unistd.h> #include <sched.h> @@ -49,7 +51,10 @@ struct record { const char *progname; int realtime_prio; bool no_buildid; + bool no_buildid_set; bool no_buildid_cache; + bool no_buildid_cache_set; + bool buildid_all; unsigned long long samples; }; @@ -319,7 +324,10 @@ try_again: } else { pr_err("failed to mmap with %d (%s)\n", errno, strerror_r(errno, msg, sizeof(msg))); - rc = -errno; + if (errno) + rc = -errno; + else + rc = -EINVAL; } goto out; } @@ -362,6 +370,13 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; + /* + * If --buildid-all is given, it marks all DSO regardless of hits, + * so no need to process samples. + */ + if (rec->buildid_all) + rec->tool.sample = NULL; + return perf_session__process_events(session); } @@ -452,6 +467,31 @@ static void record__init_features(struct record *rec) if (!rec->opts.full_auxtrace) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + + perf_header__clear_feat(&session->header, HEADER_STAT); +} + +static void +record__finish_output(struct record *rec) +{ + struct perf_data_file *file = &rec->file; + int fd = perf_data_file__fd(file); + + if (file->is_pipe) + return; + + rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); + + if (!rec->no_buildid) { + process_buildids(rec); + + if (rec->buildid_all) + dsos__hit_all(rec->session); + } + perf_session__write_header(rec->session, rec->evlist, fd, true); + + return; } static volatile int workload_exec_errno; @@ -472,6 +512,74 @@ static void workload_exec_failed_signal(int signo __maybe_unused, static void snapshot_sig_handler(int sig); +static int record__synthesize(struct record *rec) +{ + struct perf_session *session = rec->session; + struct machine *machine = &session->machines.host; + struct perf_data_file *file = &rec->file; + struct record_opts *opts = &rec->opts; + struct perf_tool *tool = &rec->tool; + int fd = perf_data_file__fd(file); + int err = 0; + + if (file->is_pipe) { + err = perf_event__synthesize_attrs(tool, session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + goto out; + } + + if (have_tracepoints(&rec->evlist->entries)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, + process_synthesized_event); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + goto out; + } + rec->bytes_written += err; + } + } + + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out; + } + + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + + if (perf_guest) { + machines__process_guests(&session->machines, + perf_event__synthesize_guest_os, tool); + } + + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); +out: + return err; +} + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -524,6 +632,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_child; + } + /* * Normally perf_session__new would do this, but it doesn't have the * evlist. @@ -556,63 +674,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) machine = &session->machines.host; - if (file->is_pipe) { - err = perf_event__synthesize_attrs(tool, session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - goto out_child; - } - - if (have_tracepoints(&rec->evlist->entries)) { - /* - * FIXME err <= 0 here actually means that - * there were no tracepoints so its not really - * an error, just that we don't need to - * synthesize anything. We really have to - * return this more properly and also - * propagate errors that now are calling die() - */ - err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist, - process_synthesized_event); - if (err <= 0) { - pr_err("Couldn't record tracing data.\n"); - goto out_child; - } - rec->bytes_written += err; - } - } - - if (rec->opts.full_auxtrace) { - err = perf_event__synthesize_auxtrace_info(rec->itr, tool, - session, process_synthesized_event); - if (err) - goto out_delete_session; - } - - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - if (err < 0) - pr_err("Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); + err = record__synthesize(rec); if (err < 0) - pr_err("Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); - - if (perf_guest) { - machines__process_guests(&session->machines, - perf_event__synthesize_guest_os, tool); - } - - err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address, - opts->proc_map_timeout); - if (err != 0) goto out_child; if (rec->realtime_prio) { @@ -748,22 +811,8 @@ out_child: /* this will be recalculated during process_buildids() */ rec->samples = 0; - if (!err && !file->is_pipe) { - rec->session->header.data_size += rec->bytes_written; - file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - - if (!rec->no_buildid) { - process_buildids(rec); - /* - * We take all buildids when the file contains - * AUX area tracing data because we do not decode the - * trace because it would take too long. - */ - if (rec->opts.full_auxtrace) - dsos__hit_all(rec->session); - } - perf_session__write_header(rec->session, rec->evlist, fd, true); - } + if (!err) + record__finish_output(rec); if (!err && !quiet) { char samples[128]; @@ -813,8 +862,12 @@ int record_parse_callchain_opt(const struct option *opt, } ret = parse_callchain_record_opt(arg, &callchain_param); - if (!ret) + if (!ret) { + /* Enable data address sampling for DWARF unwind. */ + if (callchain_param.record_mode == CALLCHAIN_DWARF) + record->sample_address = true; callchain_debug(); + } return ret; } @@ -837,6 +890,19 @@ int record_callchain_opt(const struct option *opt, static int perf_record_config(const char *var, const char *value, void *cb) { + struct record *rec = cb; + + if (!strcmp(var, "record.build-id")) { + if (!strcmp(value, "cache")) + rec->no_buildid_cache = false; + else if (!strcmp(value, "no-cache")) + rec->no_buildid_cache = true; + else if (!strcmp(value, "skip")) + rec->no_buildid = true; + else + return -1; + return 0; + } if (!strcmp(var, "record.call-graph")) var = "call-graph.record-mode"; /* fall-through */ @@ -1074,10 +1140,12 @@ struct option __record_options[] = { OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), - OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, - "do not update the buildid cache"), - OPT_BOOLEAN('B', "no-buildid", &record.no_buildid, - "do not collect buildids in perf.data"), + OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache, + &record.no_buildid_cache_set, + "do not update the buildid cache"), + OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid, + &record.no_buildid_set, + "do not collect buildids in perf.data"), OPT_CALLBACK('G', "cgroup", &record.evlist, "name", "monitor event in cgroup name only", parse_cgroups), @@ -1113,12 +1181,20 @@ struct option __record_options[] = { "per thread proc mmap processing timeout in ms"), OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, "Record context switch events"), -#ifdef HAVE_LIBBPF_SUPPORT + OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel, + "Configure all used events to run in kernel space.", + PARSE_OPT_EXCLUSIVE), + OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, + "Configure all used events to run in user space.", + PARSE_OPT_EXCLUSIVE), OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", "clang binary to use for compiling BPF scriptlets"), OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", "options passed to clang when compiling BPF scriptlets"), -#endif + OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN(0, "buildid-all", &record.buildid_all, + "Record build-id of all DSOs regardless of hits"), OPT_END() }; @@ -1130,6 +1206,27 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) struct record *rec = &record; char errbuf[BUFSIZ]; +#ifndef HAVE_LIBBPF_SUPPORT +# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c) + set_nobuild('\0', "clang-path", true); + set_nobuild('\0', "clang-opt", true); +# undef set_nobuild +#endif + +#ifndef HAVE_BPF_PROLOGUE +# if !defined (HAVE_DWARF_SUPPORT) +# define REASON "NO_DWARF=1" +# elif !defined (HAVE_LIBBPF_SUPPORT) +# define REASON "NO_LIBBPF=1" +# else +# define REASON "this architecture doesn't support BPF prologue" +# endif +# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c) + set_nobuild('\0', "vmlinux", true); +# undef set_nobuild +# undef REASON +#endif + rec->evlist = perf_evlist__new(); if (rec->evlist == NULL) return -ENOMEM; @@ -1215,6 +1312,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (err) goto out_symbol_exit; + /* + * We take all buildids when the file contains + * AUX area tracing data because we do not decode the + * trace because it would take too long. + */ + if (rec->opts.full_auxtrace) + rec->buildid_all = true; + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out_symbol_exit; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2853ad2bd435..7eea49f9ed46 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -27,7 +27,8 @@ #include "util/session.h" #include "util/tool.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> +#include <subcmd/exec-cmd.h> #include "util/parse-events.h" #include "util/thread.h" @@ -44,8 +45,7 @@ struct report { struct perf_tool tool; struct perf_session *session; - bool force, use_tui, use_gtk, use_stdio; - bool hide_unresolved; + bool use_tui, use_gtk, use_stdio; bool dont_use_callchains; bool show_full_info; bool show_threads; @@ -75,7 +75,10 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } if (!strcmp(var, "report.percent-limit")) { - rep->min_percent = strtof(value, NULL); + double pcnt = strtof(value, NULL); + + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } if (!strcmp(var, "report.children")) { @@ -87,7 +90,7 @@ static int report__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int hist_iter__report_callback(struct hist_entry_iter *iter, @@ -146,7 +149,7 @@ static int process_sample_event(struct perf_tool *tool, struct hist_entry_iter iter = { .evsel = evsel, .sample = sample, - .hide_unresolved = rep->hide_unresolved, + .hide_unresolved = symbol_conf.hide_unresolved, .add_entry_cb = hist_iter__report_callback, }; int ret = 0; @@ -157,7 +160,7 @@ static int process_sample_event(struct perf_tool *tool, return -1; } - if (rep->hide_unresolved && al.sym == NULL) + if (symbol_conf.hide_unresolved && al.sym == NULL) goto out_put; if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) @@ -434,7 +437,14 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct perf_evlist *evlist = session->evlist; - const char *help = "For a higher level overview, try: perf report --sort comm,dso"; + const char *help = perf_tip(system_path(TIPDIR)); + + if (help == NULL) { + /* fallback for people who don't install perf ;-) */ + help = perf_tip(DOCDIR); + if (help == NULL) + help = "Cannot load tips.txt file, please install perf!"; + } switch (use_browser) { case 1: @@ -459,10 +469,11 @@ static int report__browse_hists(struct report *rep) return ret; } -static void report__collapse_hists(struct report *rep) +static int report__collapse_hists(struct report *rep) { struct ui_progress prog; struct perf_evsel *pos; + int ret = 0; ui_progress__init(&prog, rep->nr_entries, "Merging related events..."); @@ -474,7 +485,9 @@ static void report__collapse_hists(struct report *rep) hists->socket_filter = rep->socket_filter; - hists__collapse_resort(hists, &prog); + ret = hists__collapse_resort(hists, &prog); + if (ret < 0) + break; /* Non-group events are considered as leader */ if (symbol_conf.event_group && @@ -487,6 +500,7 @@ static void report__collapse_hists(struct report *rep) } ui_progress__finish(); + return ret; } static void report__output_resort(struct report *rep) @@ -497,7 +511,7 @@ static void report__output_resort(struct report *rep) ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); evlist__for_each(rep->session->evlist, pos) - hists__output_resort(evsel__hists(pos), &prog); + perf_evsel__output_resort(pos, &prog); ui_progress__finish(); } @@ -514,20 +528,26 @@ static int __cmd_report(struct report *rep) if (rep->cpu_list) { ret = perf_session__cpu_bitmap(session, rep->cpu_list, rep->cpu_bitmap); - if (ret) + if (ret) { + ui__error("failed to set cpu bitmap\n"); return ret; + } } if (rep->show_threads) perf_read_values_init(&rep->show_threads_values); ret = report__setup_sample_type(rep); - if (ret) + if (ret) { + /* report__setup_sample_type() already showed error message */ return ret; + } ret = perf_session__process_events(session); - if (ret) + if (ret) { + ui__error("failed to process sample\n"); return ret; + } report__warn_kptr_restrict(rep); @@ -548,7 +568,11 @@ static int __cmd_report(struct report *rep) } } - report__collapse_hists(rep); + ret = report__collapse_hists(rep); + if (ret) { + ui__error("failed to process hist entry\n"); + return ret; + } if (session_done()) return 0; @@ -620,12 +644,14 @@ parse_percent_limit(const struct option *opt, const char *str, int unset __maybe_unused) { struct report *rep = opt->value; + double pcnt = strtof(str, NULL); - rep->min_percent = strtof(str, NULL); + rep->min_percent = pcnt; + callchain_param.min_percent = pcnt; return 0; } -#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function" +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" CALLCHAIN_REPORT_HELP @@ -678,7 +704,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('f', "force", &report.force, "don't complain, do it"), + OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples, @@ -708,7 +734,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other, "Only display entries with parent-match"), OPT_CALLBACK_DEFAULT('g', "call-graph", &report, - "print_type,threshold[,print_limit],order,sort_key[,branch]", + "print_type,threshold[,print_limit],order,sort_key[,branch],value", report_callchain_help, &report_parse_callchain_opt, callchain_default_opt), OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, @@ -740,7 +766,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), - OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, + OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved, "Only display entries resolved to a symbol"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), @@ -783,6 +809,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Show callgraph from reference event"), OPT_INTEGER(0, "socket-filter", &report.socket_filter, "only show processor socket that match with this filter"), + OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, + "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; struct perf_data_file file = { @@ -796,6 +826,16 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) perf_config(report__config, &report); argc = parse_options(argc, argv, options, report_usage, 0); + if (argc) { + /* + * Special case: if there's an argument left then assume that + * it's a symbol filter: + */ + if (argc > 1) + usage_with_options(report_usage, options); + + report.symbol_filter_str = argv[0]; + } if (symbol_conf.vmlinux_name && access(symbol_conf.vmlinux_name, R_OK)) { @@ -832,7 +872,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) } file.path = input_name; - file.force = report.force; + file.force = symbol_conf.force; repeat: session = perf_session__new(&file, false, &report.tool); @@ -882,13 +922,19 @@ repeat: symbol_conf.cumulate_callchain = false; } - if (setup_sorting() < 0) { - if (sort_order) - parse_options_usage(report_usage, options, "s", 1); - if (field_order) - parse_options_usage(sort_order ? NULL : report_usage, - options, "F", 1); - goto error; + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(report_usage, options, "F", 1); + parse_options_usage(NULL, options, "hierarchy", 0); + goto error; + } + + sort__need_collapse = true; } /* Force tty output for header output and per-thread stat. */ @@ -900,6 +946,15 @@ repeat: else use_browser = 0; + if (setup_sorting(session->evlist) < 0) { + if (sort_order) + parse_options_usage(report_usage, options, "s", 1); + if (field_order) + parse_options_usage(sort_order ? NULL : report_usage, + options, "F", 1); + goto error; + } + if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); @@ -941,17 +996,6 @@ repeat: if (symbol__init(&session->header.env) < 0) goto error; - if (argc) { - /* - * Special case: if there's an argument left then assume that - * it's a symbol filter: - */ - if (argc > 1) - usage_with_options(report_usage, options); - - report.symbol_filter_str = argv[0]; - } - sort__setup_elide(stdout); ret = __cmd_report(&report); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 0ee6d900e100..871b55ae22a4 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -12,7 +12,7 @@ #include "util/tool.h" #include "util/cloexec.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/trace-event.h" #include "util/debug.h" @@ -1203,12 +1203,13 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ static int pid_cmp(struct work_atoms *l, struct work_atoms *r) { + if (l->thread == r->thread) + return 0; if (l->thread->tid < r->thread->tid) return -1; if (l->thread->tid > r->thread->tid) return 1; - - return 0; + return (int)(l->thread - r->thread); } static int avg_cmp(struct work_atoms *l, struct work_atoms *r) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 72b5deb4bd79..57f9a7e7f7d3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3,9 +3,9 @@ #include "perf.h" #include "util/cache.h" #include "util/debug.h" -#include "util/exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "util/header.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/perf_regs.h" #include "util/session.h" #include "util/tool.h" @@ -18,7 +18,12 @@ #include "util/sort.h" #include "util/data.h" #include "util/auxtrace.h" +#include "util/cpumap.h" +#include "util/thread_map.h" +#include "util/stat.h" #include <linux/bitmap.h> +#include "asm/bug.h" +#include "util/mem-events.h" static char const *script_name; static char const *generate_script_lang; @@ -32,6 +37,7 @@ static bool print_flags; static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static struct perf_stat_config stat_config; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -53,6 +59,9 @@ enum perf_output_field { PERF_OUTPUT_IREGS = 1U << 14, PERF_OUTPUT_BRSTACK = 1U << 15, PERF_OUTPUT_BRSTACKSYM = 1U << 16, + PERF_OUTPUT_DATA_SRC = 1U << 17, + PERF_OUTPUT_WEIGHT = 1U << 18, + PERF_OUTPUT_BPF_OUTPUT = 1U << 19, }; struct output_option { @@ -76,6 +85,9 @@ struct output_option { {.str = "iregs", .field = PERF_OUTPUT_IREGS}, {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, + {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC}, + {.str = "weight", .field = PERF_OUTPUT_WEIGHT}, + {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT}, }; /* default set to maintain compatibility with current format */ @@ -96,7 +108,7 @@ static struct { PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, [PERF_TYPE_SOFTWARE] = { @@ -106,7 +118,7 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE, }, @@ -116,7 +128,7 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | - PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE, + PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, [PERF_TYPE_RAW] = { @@ -126,9 +138,22 @@ static struct { PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | + PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | + PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, + + [PERF_TYPE_BREAKPOINT] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, - .invalid_fields = PERF_OUTPUT_TRACE, + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, }; @@ -204,6 +229,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; bool allow_user_set; + if (perf_header__has_feat(&session->header, HEADER_STAT)) + return 0; + allow_user_set = perf_header__has_feat(&session->header, HEADER_AUXTRACE); @@ -222,6 +250,16 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; + if (PRINT_FIELD(DATA_SRC) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", + PERF_OUTPUT_DATA_SRC)) + return -EINVAL; + + if (PRINT_FIELD(WEIGHT) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", + PERF_OUTPUT_WEIGHT)) + return -EINVAL; + if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { pr_err("Display of symbols requested but neither sample IP nor " "sample address\nis selected. Hence, no addresses to convert " @@ -588,8 +626,130 @@ static void print_sample_flags(u32 flags) printf(" %-4s ", str); } -static void process_event(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, struct addr_location *al) +struct printer_data { + int line_no; + bool hit_nul; + bool is_printable; +}; + +static void +print_sample_bpf_output_printer(enum binary_printer_ops op, + unsigned int val, + void *extra) +{ + unsigned char ch = (unsigned char)val; + struct printer_data *printer_data = extra; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("\n"); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("%17s", !printer_data->line_no ? "BPF output:" : + " "); + break; + case BINARY_PRINT_ADDR: + printf(" %04x:", val); + break; + case BINARY_PRINT_NUM_DATA: + printf(" %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + printf(" "); + break; + case BINARY_PRINT_SEP: + printf(" "); + break; + case BINARY_PRINT_CHAR_DATA: + if (printer_data->hit_nul && ch) + printer_data->is_printable = false; + + if (!isprint(ch)) { + printf("%c", '.'); + + if (!printer_data->is_printable) + break; + + if (ch == '\0') + printer_data->hit_nul = true; + else + printer_data->is_printable = false; + } else { + printf("%c", ch); + } + break; + case BINARY_PRINT_CHAR_PAD: + printf(" "); + break; + case BINARY_PRINT_LINE_END: + printf("\n"); + printer_data->line_no++; + break; + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void print_sample_bpf_output(struct perf_sample *sample) +{ + unsigned int nr_bytes = sample->raw_size; + struct printer_data printer_data = {0, false, true}; + + print_binary(sample->raw_data, nr_bytes, 8, + print_sample_bpf_output_printer, &printer_data); + + if (printer_data.is_printable && printer_data.hit_nul) + printf("%17s \"%s\"\n", "BPF string:", + (char *)(sample->raw_data)); +} + +struct perf_script { + struct perf_tool tool; + struct perf_session *session; + bool show_task_events; + bool show_mmap_events; + bool show_switch_events; + bool allocated; + struct cpu_map *cpus; + struct thread_map *threads; + int name_width; +}; + +static int perf_evlist__max_name_len(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + int max = 0; + + evlist__for_each(evlist, evsel) { + int len = strlen(perf_evsel__name(evsel)); + + max = MAX(len, max); + } + + return max; +} + +static size_t data_src__printf(u64 data_src) +{ + struct mem_info mi = { .data_src.val = data_src }; + char decode[100]; + char out[100]; + static int maxlen; + int len; + + perf_script__meminfo_scnprintf(decode, 100, &mi); + + len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode); + if (maxlen < len) + maxlen = len; + + return printf("%-*s", maxlen, out); +} + +static void process_event(struct perf_script *script, union perf_event *event, + struct perf_sample *sample, struct perf_evsel *evsel, + struct addr_location *al) { struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; @@ -604,7 +764,12 @@ static void process_event(union perf_event *event, struct perf_sample *sample, if (PRINT_FIELD(EVNAME)) { const char *evname = perf_evsel__name(evsel); - printf("%s: ", evname ? evname : "[unknown]"); + + if (!script->name_width) + script->name_width = perf_evlist__max_name_len(script->session->evlist); + + printf("%*s: ", script->name_width, + evname ? evname : "[unknown]"); } if (print_flags) @@ -621,6 +786,12 @@ static void process_event(union perf_event *event, struct perf_sample *sample, if (PRINT_FIELD(ADDR)) print_sample_addr(event, sample, thread, attr); + if (PRINT_FIELD(DATA_SRC)) + data_src__printf(sample->data_src); + + if (PRINT_FIELD(WEIGHT)) + printf("%16" PRIu64, sample->weight); + if (PRINT_FIELD(IP)) { if (!symbol_conf.use_callchain) printf(" "); @@ -640,68 +811,87 @@ static void process_event(union perf_event *event, struct perf_sample *sample, else if (PRINT_FIELD(BRSTACKSYM)) print_sample_brstacksym(event, sample, thread, attr); + if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) + print_sample_bpf_output(sample); + printf("\n"); } -static int default_start_script(const char *script __maybe_unused, - int argc __maybe_unused, - const char **argv __maybe_unused) -{ - return 0; -} +static struct scripting_ops *scripting_ops; -static int default_flush_script(void) +static void __process_stat(struct perf_evsel *counter, u64 tstamp) { - return 0; + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; + static int header_printed; + + if (counter->system_wide) + nthreads = 1; + + if (!header_printed) { + printf("%3s %8s %15s %15s %15s %15s %s\n", + "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT"); + header_printed = 1; + } + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + struct perf_counts_values *counts; + + counts = perf_counts(counter->counts, cpu, thread); + + printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n", + counter->cpus->map[cpu], + thread_map__pid(counter->threads, thread), + counts->val, + counts->ena, + counts->run, + tstamp, + perf_evsel__name(counter)); + } + } } -static int default_stop_script(void) +static void process_stat(struct perf_evsel *counter, u64 tstamp) { - return 0; + if (scripting_ops && scripting_ops->process_stat) + scripting_ops->process_stat(&stat_config, counter, tstamp); + else + __process_stat(counter, tstamp); } -static int default_generate_script(struct pevent *pevent __maybe_unused, - const char *outfile __maybe_unused) +static void process_stat_interval(u64 tstamp) { - return 0; + if (scripting_ops && scripting_ops->process_stat_interval) + scripting_ops->process_stat_interval(tstamp); } -static struct scripting_ops default_scripting_ops = { - .start_script = default_start_script, - .flush_script = default_flush_script, - .stop_script = default_stop_script, - .process_event = process_event, - .generate_script = default_generate_script, -}; - -static struct scripting_ops *scripting_ops; - static void setup_scripting(void) { setup_perl_scripting(); setup_python_scripting(); - - scripting_ops = &default_scripting_ops; } static int flush_scripting(void) { - return scripting_ops->flush_script(); + return scripting_ops ? scripting_ops->flush_script() : 0; } static int cleanup_scripting(void) { pr_debug("\nperf script stopped\n"); - return scripting_ops->stop_script(); + return scripting_ops ? scripting_ops->stop_script() : 0; } -static int process_sample_event(struct perf_tool *tool __maybe_unused, +static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, struct machine *machine) { + struct perf_script *scr = container_of(tool, struct perf_script, tool); struct addr_location al; if (debug_mode) { @@ -727,20 +917,16 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) goto out_put; - scripting_ops->process_event(event, sample, evsel, &al); + if (scripting_ops) + scripting_ops->process_event(event, sample, evsel, &al); + else + process_event(scr, event, sample, evsel, &al); + out_put: addr_location__put(&al); return 0; } -struct perf_script { - struct perf_tool tool; - struct perf_session *session; - bool show_task_events; - bool show_mmap_events; - bool show_switch_events; -}; - static int process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist) { @@ -1026,23 +1212,6 @@ static struct script_spec *script_spec__find(const char *spec) return NULL; } -static struct script_spec *script_spec__findnew(const char *spec, - struct scripting_ops *ops) -{ - struct script_spec *s = script_spec__find(spec); - - if (s) - return s; - - s = script_spec__new(spec, ops); - if (!s) - return NULL; - - script_spec__add(s); - - return s; -} - int script_spec_register(const char *spec, struct scripting_ops *ops) { struct script_spec *s; @@ -1051,9 +1220,11 @@ int script_spec_register(const char *spec, struct scripting_ops *ops) if (s) return -1; - s = script_spec__findnew(spec, ops); + s = script_spec__new(spec, ops); if (!s) return -1; + else + script_spec__add(s); return 0; } @@ -1156,6 +1327,8 @@ static int parse_output_fields(const struct option *opt __maybe_unused, type = PERF_TYPE_TRACEPOINT; else if (!strcmp(str, "raw")) type = PERF_TYPE_RAW; + else if (!strcmp(str, "break")) + type = PERF_TYPE_BREAKPOINT; else { fprintf(stderr, "Invalid event type in field string.\n"); rc = -EINVAL; @@ -1421,7 +1594,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, char first_half[BUFSIZ]; char *script_root; - snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); + snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); scripts_dir = opendir(scripts_path); if (!scripts_dir) @@ -1542,7 +1715,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array) if (!session) return -1; - snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); + snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); scripts_dir = opendir(scripts_path); if (!scripts_dir) { @@ -1600,7 +1773,7 @@ static char *get_script_path(const char *script_root, const char *suffix) char lang_path[MAXPATHLEN]; char *__script_root; - snprintf(scripts_path, MAXPATHLEN, "%s/scripts", perf_exec_path()); + snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); scripts_dir = opendir(scripts_path); if (!scripts_dir) @@ -1695,6 +1868,87 @@ static void script__setup_sample_type(struct perf_script *script) } } +static int process_stat_round_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + struct stat_round_event *round = &event->stat_round; + struct perf_evsel *counter; + + evlist__for_each(session->evlist, counter) { + perf_stat_process_counter(&stat_config, counter); + process_stat(counter, round->time); + } + + process_stat_interval(round->time); + return 0; +} + +static int process_stat_config_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + perf_event__read_stat_config(&stat_config, &event->stat_config); + return 0; +} + +static int set_maps(struct perf_script *script) +{ + struct perf_evlist *evlist = script->session->evlist; + + if (!script->cpus || !script->threads) + return 0; + + if (WARN_ONCE(script->allocated, "stats double allocation\n")) + return -EINVAL; + + perf_evlist__set_maps(evlist, script->cpus, script->threads); + + if (perf_evlist__alloc_stats(evlist, true)) + return -ENOMEM; + + script->allocated = true; + return 0; +} + +static +int process_thread_map_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + + if (script->threads) { + pr_warning("Extra thread map event, ignoring.\n"); + return 0; + } + + script->threads = thread_map__new_event(&event->thread_map); + if (!script->threads) + return -ENOMEM; + + return set_maps(script); +} + +static +int process_cpu_map_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + + if (script->cpus) { + pr_warning("Extra cpu map event, ignoring.\n"); + return 0; + } + + script->cpus = cpu_map__new_data(&event->cpu_map.data); + if (!script->cpus) + return -ENOMEM; + + return set_maps(script); +} + int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) { bool show_full_info = false; @@ -1723,6 +1977,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, .auxtrace_error = perf_event__process_auxtrace_error, + .stat = perf_event__process_stat_event, + .stat_round = process_stat_round_event, + .stat_config = process_stat_config_event, + .thread_map = process_thread_map_event, + .cpu_map = process_cpu_map_event, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -1836,7 +2095,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) scripting_max_stack = itrace_synth_opts.callchain_sz; /* make sure PERF_EXEC_PATH is set for scripts */ - perf_set_argv_exec_path(perf_exec_path()); + set_argv_exec_path(get_argv_exec_path()); if (argc && !script_name && !rec_script_path && !rep_script_path) { int live_pipe[2]; @@ -2076,6 +2335,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) flush_scripting(); out_delete: + perf_evlist__free_stats(session->evlist); perf_session__delete(session); if (script_started) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2f438f76cceb..1f19f2f999c8 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -45,7 +45,7 @@ #include "builtin.h" #include "util/cgroup.h" #include "util/util.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/pmu.h" #include "util/event.h" @@ -59,6 +59,9 @@ #include "util/thread.h" #include "util/thread_map.h" #include "util/counts.h" +#include "util/session.h" +#include "util/tool.h" +#include "asm/bug.h" #include <stdlib.h> #include <sys/prctl.h> @@ -119,9 +122,28 @@ static bool sync_run = false; static unsigned int initial_delay = 0; static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; +static bool metric_only = false; static struct timespec ref_time; static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; +static bool append_file; +static const char *output_name; +static int output_fd; + +struct perf_stat { + bool record; + struct perf_data_file file; + struct perf_session *session; + u64 bytes_written; + struct perf_tool tool; + bool maps_allocated; + struct cpu_map *cpus; + struct thread_map *threads; + enum aggr_mode aggr_mode; +}; + +static struct perf_stat perf_stat; +#define STAT_RECORD perf_stat.record static volatile int done = 0; @@ -158,15 +180,43 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->inherit = !no_inherit; - if (target__has_cpu(&target)) - return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); + /* + * Some events get initialized with sample_(period/type) set, + * like tracepoints. Clear it up for counting. + */ + attr->sample_period = 0; - if (!target__has_task(&target) && perf_evsel__is_group_leader(evsel)) { + /* + * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless + * while avoiding that older tools show confusing messages. + * + * However for pipe sessions we need to keep it zero, + * because script's perf_evsel__check_attr is triggered + * by attr->sample_type != 0, and we can't run it on + * stat sessions. + */ + if (!(STAT_RECORD && perf_stat.file.is_pipe)) + attr->sample_type = PERF_SAMPLE_IDENTIFIER; + + /* + * Disabling all counters initially, they will be enabled + * either manually by us or by kernel via enable_on_exec + * set later. + */ + if (perf_evsel__is_group_leader(evsel)) { attr->disabled = 1; - if (!initial_delay) + + /* + * In case of initial_delay we enable tracee + * events manually. + */ + if (target__none(&target) && !initial_delay) attr->enable_on_exec = 1; } + if (target__has_cpu(&target)) + return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); + return perf_evsel__open_per_thread(evsel, evsel_list->threads); } @@ -182,6 +232,42 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } +static int process_synthesized_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + if (perf_data_file__write(&perf_stat.file, event, event->header.size) < 0) { + pr_err("failed to write perf data, error: %m\n"); + return -1; + } + + perf_stat.bytes_written += event->header.size; + return 0; +} + +static int write_stat_round_event(u64 tm, u64 type) +{ + return perf_event__synthesize_stat_round(NULL, tm, type, + process_synthesized_event, + NULL); +} + +#define WRITE_STAT_ROUND_EVENT(time, interval) \ + write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval) + +#define SID(e, x, y) xyarray__entry(e->sample_id, x, y) + +static int +perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread, + struct perf_counts_values *count) +{ + struct perf_sample_id *sid = SID(counter, cpu, thread); + + return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count, + process_synthesized_event, NULL); +} + /* * Read out the results of a single counter: * do not aggregate counts across CPUs in system-wide mode @@ -205,6 +291,13 @@ static int read_counter(struct perf_evsel *counter) count = perf_counts(counter->counts, cpu, thread); if (perf_evsel__read(counter, cpu, thread, count)) return -1; + + if (STAT_RECORD) { + if (perf_evsel__write_stat_event(counter, cpu, thread, count)) { + pr_err("failed to write stat event\n"); + return -1; + } + } } } @@ -238,21 +331,26 @@ static void process_interval(void) clock_gettime(CLOCK_MONOTONIC, &ts); diff_timespec(&rs, &ts, &ref_time); + if (STAT_RECORD) { + if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSECS_PER_SEC + rs.tv_nsec, INTERVAL)) + pr_err("failed to write stat round event\n"); + } + print_counters(&rs, 0, NULL); } -static void handle_initial_delay(void) +static void enable_counters(void) { - struct perf_evsel *counter; - - if (initial_delay) { - const int ncpus = cpu_map__nr(evsel_list->cpus), - nthreads = thread_map__nr(evsel_list->threads); - + if (initial_delay) usleep(initial_delay * 1000); - evlist__for_each(evsel_list, counter) - perf_evsel__enable(counter, ncpus, nthreads); - } + + /* + * We need to enable counters only if: + * - we don't have tracee (attaching to task or cpu) + * - we have initial delay configured + */ + if (!target__none(&target) || initial_delay) + perf_evlist__enable(evsel_list); } static volatile int workload_exec_errno; @@ -268,6 +366,135 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +static int perf_stat_synthesize_config(bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + if (is_pipe) { + err = perf_event__synthesize_attrs(NULL, perf_stat.session, + process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize evsel scale.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize evsel scale.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + + err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, + process_synthesized_event, + NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, + process_synthesized_event, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(NULL, &stat_config, + process_synthesized_event, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) + +static int __store_counter_ids(struct perf_evsel *counter, + struct cpu_map *cpus, + struct thread_map *threads) +{ + int cpu, thread; + + for (cpu = 0; cpu < cpus->nr; cpu++) { + for (thread = 0; thread < threads->nr; thread++) { + int fd = FD(counter, cpu, thread); + + if (perf_evlist__id_add_fd(evsel_list, counter, + cpu, thread, fd) < 0) + return -1; + } + } + + return 0; +} + +static int store_counter_ids(struct perf_evsel *counter) +{ + struct cpu_map *cpus = counter->cpus; + struct thread_map *threads = counter->threads; + + if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) + return -ENOMEM; + + return __store_counter_ids(counter, cpus, threads); +} + static int __run_perf_stat(int argc, const char **argv) { int interval = stat_config.interval; @@ -278,6 +505,7 @@ static int __run_perf_stat(int argc, const char **argv) size_t l; int status = 0; const bool forks = (argc > 0); + bool is_pipe = STAT_RECORD ? perf_stat.file.is_pipe : false; if (interval) { ts.tv_sec = interval / 1000; @@ -288,7 +516,7 @@ static int __run_perf_stat(int argc, const char **argv) } if (forks) { - if (perf_evlist__prepare_workload(evsel_list, &target, argv, false, + if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) { perror("failed to prepare workload"); return -1; @@ -332,6 +560,9 @@ static int __run_perf_stat(int argc, const char **argv) l = strlen(counter->unit); if (l > unit_width) unit_width = l; + + if (STAT_RECORD && store_counter_ids(counter)) + return -1; } if (perf_evlist__apply_filters(evsel_list, &counter)) { @@ -341,6 +572,24 @@ static int __run_perf_stat(int argc, const char **argv) return -1; } + if (STAT_RECORD) { + int err, fd = perf_data_file__fd(&perf_stat.file); + + if (is_pipe) { + err = perf_header__write_pipe(perf_data_file__fd(&perf_stat.file)); + } else { + err = perf_session__write_header(perf_stat.session, evsel_list, + fd, false); + } + + if (err < 0) + return err; + + err = perf_stat_synthesize_config(is_pipe); + if (err < 0) + return err; + } + /* * Enable counters and exec the command: */ @@ -349,7 +598,7 @@ static int __run_perf_stat(int argc, const char **argv) if (forks) { perf_evlist__start_workload(evsel_list); - handle_initial_delay(); + enable_counters(); if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { @@ -368,7 +617,7 @@ static int __run_perf_stat(int argc, const char **argv) if (WIFSIGNALED(status)) psignal(WTERMSIG(status), argv[0]); } else { - handle_initial_delay(); + enable_counters(); while (!done) { nanosleep(&ts, NULL); if (interval) @@ -487,6 +736,191 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) } } +struct outstate { + FILE *fh; + bool newline; + const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; +}; + +#define METRIC_LEN 35 + +static void new_line_std(void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); + if (stat_config.aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + +static void new_line_csv(void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(csv_sep, os->fh); +} + +static void print_metric_csv(void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s%s%s", csv_sep, csv_sep, csv_sep, csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); +} + +#define METRIC_ONLY_LEN 20 + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + char buf[1024]; + unsigned mlen = METRIC_ONLY_LEN; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (color) + n = color_fprintf(out, color, fmt, val); + else + n = fprintf(out, fmt, val); + if (n > METRIC_ONLY_LEN) + n = METRIC_ONLY_LEN; + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + fprintf(out, "%*s", mlen - n, ""); +} + +static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + vals = buf; + while (isspace(*vals)) + vals++; + ends = vals; + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, csv_sep); +} + +static void new_line_metric(void *ctx __maybe_unused) +{ +} + +static void print_metric_header(void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (csv_output) + fprintf(os->fh, "%s%s", unit, csv_sep); + else + fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); +} + static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -513,15 +947,28 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); +} - if (csv_output || stat_config.interval) - return; +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; - if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) - fprintf(output, " # %8.3f CPUs utilized ", - avg / avg_stats(&walltime_nsecs_stats)); - else - fprintf(output, " "); + if (!aggr_get_id) + return 0; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; } static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) @@ -529,7 +976,6 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) FILE *output = stat_config.output; double sc = evsel->scale; const char *fmt; - int cpu = cpu_map__id_to_cpu(id); if (csv_output) { fmt = sc != 1.0 ? "%.2f%s" : "%.0f%s"; @@ -542,9 +988,6 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) aggr_printout(evsel, id, nr); - if (stat_config.aggr_mode == AGGR_GLOBAL) - cpu = 0; - fprintf(output, fmt, avg, csv_sep); if (evsel->unit) @@ -556,12 +999,126 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (evsel->cgrp) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); +} + +static void printout(int id, int nr, struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise) +{ + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = stat_config.output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, + }; + print_metric_t pm = print_metric_std; + void (*nl)(void *); + + if (metric_only) { + nl = new_line_metric; + if (csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; + + if (csv_output && !metric_only) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[stat_config.aggr_mode]; + if (counter->cgrp) + os.nfields++; + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (metric_only) { + pm(&os, NULL, "", "", 0); + return; + } + aggr_printout(counter, id, nr); + + fprintf(stat_config.output, "%*s%s", + csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + csv_sep); + + fprintf(stat_config.output, "%-*s%s", + csv_output ? 0 : unit_width, + counter->unit, csv_sep); + + fprintf(stat_config.output, "%*s", + csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(stat_config.output, "%s%s", + csv_sep, counter->cgrp->name); - if (csv_output || stat_config.interval) + if (!csv_output) + pm(&os, NULL, NULL, "", 0); + print_noise(counter, noise); + print_running(run, ena); + if (csv_output) + pm(&os, NULL, NULL, "", 0); return; + } + + if (metric_only) + /* nothing */; + else if (nsec_counter(counter)) + nsec_printout(id, nr, counter, uval); + else + abs_printout(id, nr, counter, uval); - perf_stat__print_shadow_stats(output, evsel, avg, cpu, - stat_config.aggr_mode); + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + + if (csv_output && !metric_only) { + print_noise(counter, noise); + print_running(run, ena); + } + + perf_stat__print_shadow_stats(counter, uval, + first_shadow_cpu(counter, id), + &out); + if (!csv_output && !metric_only) { + print_noise(counter, noise); + print_running(run, ena); + } +} + +static void aggr_update_shadow(void) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < aggr_map->nr; s++) { + id = aggr_map->map[s]; + evlist__for_each(evsel_list, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = aggr_get_id(evsel_list->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + val = val * counter->scale; + perf_stat__update_shadow_stats(counter, &val, + first_shadow_cpu(counter, id)); + } + } } static void print_aggr(char *prefix) @@ -571,12 +1128,23 @@ static void print_aggr(char *prefix) int cpu, s, s2, id, nr; double uval; u64 ena, run, val; + bool first; if (!(aggr_map || aggr_get_id)) return; + aggr_update_shadow(); + + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ for (s = 0; s < aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + id = aggr_map->map[s]; + first = true; evlist__for_each(evsel_list, counter) { val = ena = run = 0; nr = 0; @@ -589,46 +1157,20 @@ static void print_aggr(char *prefix) run += perf_counts(counter->counts, cpu, 0)->run; nr++; } - if (prefix) - fprintf(output, "%s", prefix); - - if (run == 0 || ena == 0) { + if (first && metric_only) { + first = false; aggr_printout(counter, id, nr); - - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); - - print_running(run, ena); - fputc('\n', output); - continue; } - uval = val * counter->scale; - - if (nsec_counter(counter)) - nsec_printout(id, nr, counter, uval); - else - abs_printout(id, nr, counter, uval); - - if (!csv_output) - print_noise(counter, 1.0); + if (prefix && !metric_only) + fprintf(output, "%s", prefix); - print_running(run, ena); - fputc('\n', output); + uval = val * counter->scale; + printout(id, nr, counter, uval, prefix, run, ena, 1.0); + if (!metric_only) + fputc('\n', output); } + if (metric_only) + fputc('\n', output); } } @@ -653,16 +1195,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - - if (nsec_counter(counter)) - nsec_printout(thread, 0, counter, uval); - else - abs_printout(thread, 0, counter, uval); - - if (!csv_output) - print_noise(counter, 1.0); - - print_running(run, ena); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0); fputc('\n', output); } } @@ -676,47 +1209,19 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) FILE *output = stat_config.output; struct perf_stat_evsel *ps = counter->priv; double avg = avg_stats(&ps->res_stats[0]); - int scaled = counter->counts->scaled; double uval; double avg_enabled, avg_running; avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - if (prefix) + if (prefix && !metric_only) fprintf(output, "%s", prefix); - if (scaled == -1 || !counter->supported) { - fprintf(output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(output, "%s%s", csv_sep, counter->cgrp->name); - - print_running(avg_running, avg_enabled); - fputc('\n', output); - return; - } - uval = avg * counter->scale; - - if (nsec_counter(counter)) - nsec_printout(-1, 0, counter, uval); - else - abs_printout(-1, 0, counter, uval); - - print_noise(counter, avg); - - print_running(avg_running, avg_enabled); - fprintf(output, "\n"); + printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg); + if (!metric_only) + fprintf(output, "\n"); } /* @@ -738,44 +1243,78 @@ static void print_counter(struct perf_evsel *counter, char *prefix) if (prefix) fprintf(output, "%s", prefix); - if (run == 0 || ena == 0) { - fprintf(output, "CPU%*d%s%*s%s", - csv_output ? 0 : -4, - perf_evsel__cpus(counter)->map[cpu], csv_sep, - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); + fputc('\n', output); + } +} - fprintf(output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); +static void print_no_aggr_metric(char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; - if (counter->cgrp) - fprintf(output, "%s%s", - csv_sep, counter->cgrp->name); + nrcpus = evsel_list->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; - print_running(run, ena); - fputc('\n', output); - continue; + if (prefix) + fputs(prefix, stat_config.output); + evlist__for_each(evsel_list, counter) { + if (first) { + aggr_printout(counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); } + fputc('\n', stat_config.output); + } +} - uval = val * counter->scale; +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; - if (nsec_counter(counter)) - nsec_printout(cpu, 0, counter, uval); - else - abs_printout(cpu, 0, counter, uval); +static void print_metric_headers(char *prefix) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = stat_config.output + }; - if (!csv_output) - print_noise(counter, 1.0); - print_running(run, ena); + if (prefix) + fprintf(stat_config.output, "%s", prefix); - fputc('\n', output); + if (!csv_output) + fprintf(stat_config.output, "%*s", + aggr_header_lens[stat_config.aggr_mode], ""); + + /* Print metrics headers only */ + evlist__for_each(evsel_list, counter) { + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + os.evsel = counter; + perf_stat__print_shadow_stats(counter, 0, + 0, + &out); } + fputc('\n', stat_config.output); } static void print_interval(char *prefix, struct timespec *ts) @@ -785,7 +1324,7 @@ static void print_interval(char *prefix, struct timespec *ts) sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if (num_print_interval == 0 && !csv_output && !metric_only) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); @@ -826,8 +1365,8 @@ static void print_header(int argc, const char **argv) else if (target.cpu_list) fprintf(output, "\'CPU(s) %s", target.cpu_list); else if (!target__has_task(&target)) { - fprintf(output, "\'%s", argv[0]); - for (i = 1; i < argc; i++) + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) fprintf(output, " %s", argv[i]); } else if (target.pid) fprintf(output, "process id \'%s", target.pid); @@ -863,11 +1402,26 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) struct perf_evsel *counter; char buf[64], *prefix = NULL; + /* Do not print anything if we record to the pipe. */ + if (STAT_RECORD && perf_stat.file.is_pipe) + return; + if (interval) print_interval(prefix = buf, ts); else print_header(argc, argv); + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0) + print_metric_headers(prefix); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) + fprintf(stat_config.output, "%s", prefix); + } + switch (stat_config.aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: @@ -880,10 +1434,16 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) print_counter_aggr(counter, prefix); + if (metric_only) + fputc('\n', stat_config.output); break; case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); + if (metric_only) + print_no_aggr_metric(prefix); + else { + evlist__for_each(evsel_list, counter) + print_counter(counter, prefix); + } break; case AGGR_UNSET: default: @@ -946,6 +1506,69 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, return 0; } +static const struct option stat_options[] = { + OPT_BOOLEAN('T', "transaction", &transaction_run, + "hardware transaction statistics"), + OPT_CALLBACK('e', "event", &evsel_list, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), + OPT_CALLBACK(0, "filter", &evsel_list, "filter", + "event filter", parse_filter), + OPT_BOOLEAN('i', "no-inherit", &no_inherit, + "child tasks do not inherit counters"), + OPT_STRING('p', "pid", &target.pid, "pid", + "stat events on existing process id"), + OPT_STRING('t', "tid", &target.tid, "tid", + "stat events on existing thread id"), + OPT_BOOLEAN('a', "all-cpus", &target.system_wide, + "system-wide collection from all CPUs"), + OPT_BOOLEAN('g', "group", &group, + "put the counters into a counter group"), + OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_INTEGER('r', "repeat", &run_count, + "repeat command and print average + stddev (max: 100, forever: 0)"), + OPT_BOOLEAN('n', "null", &null_run, + "null run - dont start any counters"), + OPT_INCR('d', "detailed", &detailed_run, + "detailed run - start a lot of events"), + OPT_BOOLEAN('S', "sync", &sync_run, + "call sync() before starting a run"), + OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, + "print large numbers with thousands\' separators", + stat__set_big_num), + OPT_STRING('C', "cpu", &target.cpu_list, "cpu", + "list of cpus to monitor in system-wide"), + OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, + "disable CPU count aggregation", AGGR_NONE), + OPT_STRING('x', "field-separator", &csv_sep, "separator", + "print counts with custom separator"), + OPT_CALLBACK('G', "cgroup", &evsel_list, "name", + "monitor event in cgroup name only", parse_cgroups), + OPT_STRING('o', "output", &output_name, "file", "output file name"), + OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), + OPT_INTEGER(0, "log-fd", &output_fd, + "log output to fd, instead of stderr"), + OPT_STRING(0, "pre", &pre_cmd, "command", + "command to run prior to the measured command"), + OPT_STRING(0, "post", &post_cmd, "command", + "command to run after to the measured command"), + OPT_UINTEGER('I', "interval-print", &stat_config.interval, + "print counts at regular interval in ms (>= 10)"), + OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, + "aggregate counts per processor socket", AGGR_SOCKET), + OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, + "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, + "aggregate counts per thread", AGGR_THREAD), + OPT_UINTEGER('D', "delay", &initial_delay, + "ms to wait before starting measurement after program start"), + OPT_BOOLEAN(0, "metric-only", &metric_only, + "Only print computed metrics. No raw values"), + OPT_END() +}; + static int perf_stat__get_socket(struct cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); @@ -1032,13 +1655,116 @@ static int perf_stat_init_aggr_mode(void) return cpus_aggr_map ? 0 : -ENOMEM; } +static void perf_stat__exit_aggr_mode(void) +{ + cpu_map__put(aggr_map); + cpu_map__put(cpus_aggr_map); + aggr_map = NULL; + cpus_aggr_map = NULL; +} + +static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) +{ + int cpu; + + if (idx > map->nr) + return -1; + + cpu = map->map[idx]; + + if (cpu >= env->nr_cpus_online) + return -1; + + return cpu; +} + +static int perf_env__get_socket(struct cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int cpu = perf_env__get_cpu(env, map, idx); + + return cpu == -1 ? -1 : env->cpu[cpu].socket_id; +} + +static int perf_env__get_core(struct cpu_map *map, int idx, void *data) +{ + struct perf_env *env = data; + int core = -1, cpu = perf_env__get_cpu(env, map, idx); + + if (cpu != -1) { + int socket_id = env->cpu[cpu].socket_id; + + /* + * Encode socket in upper 16 bits + * core_id is relative to socket, and + * we need a global id. So we combine + * socket + core id. + */ + core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); + } + + return core; +} + +static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus, + struct cpu_map **sockp) +{ + return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); +} + +static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, + struct cpu_map **corep) +{ + return cpu_map__build_map(cpus, corep, perf_env__get_core, env); +} + +static int perf_stat__get_socket_file(struct cpu_map *map, int idx) +{ + return perf_env__get_socket(map, idx, &perf_stat.session->header.env); +} + +static int perf_stat__get_core_file(struct cpu_map *map, int idx) +{ + return perf_env__get_core(map, idx, &perf_stat.session->header.env); +} + +static int perf_stat_init_aggr_mode_file(struct perf_stat *st) +{ + struct perf_env *env = &st->session->header.env; + + switch (stat_config.aggr_mode) { + case AGGR_SOCKET: + if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { + perror("cannot build socket map"); + return -1; + } + aggr_get_id = perf_stat__get_socket_file; + break; + case AGGR_CORE: + if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { + perror("cannot build core map"); + return -1; + } + aggr_get_id = perf_stat__get_core_file; + break; + case AGGR_NONE: + case AGGR_GLOBAL: + case AGGR_THREAD: + case AGGR_UNSET: + default: + break; + } + + return 0; +} + /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ static int add_default_attributes(void) { - struct perf_event_attr default_attrs[] = { + struct perf_event_attr default_attrs0[] = { { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, @@ -1046,8 +1772,14 @@ static int add_default_attributes(void) { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, +}; + struct perf_event_attr frontend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, +}; + struct perf_event_attr backend_attrs[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, +}; + struct perf_event_attr default_attrs1[] = { { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, @@ -1164,7 +1896,19 @@ static int add_default_attributes(void) } if (!evsel_list->nr_entries) { - if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0) + return -1; + if (pmu_have_event("cpu", "stalled-cycles-frontend")) { + if (perf_evlist__add_default_attrs(evsel_list, + frontend_attrs) < 0) + return -1; + } + if (pmu_have_event("cpu", "stalled-cycles-backend")) { + if (perf_evlist__add_default_attrs(evsel_list, + backend_attrs) < 0) + return -1; + } + if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0) return -1; } @@ -1191,71 +1935,227 @@ static int add_default_attributes(void) return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } -int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) +static const char * const stat_record_usage[] = { + "perf stat record [<options>]", + NULL, +}; + +static void init_features(struct perf_session *session) +{ + int feat; + + for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) + perf_header__set_feat(&session->header, feat); + + perf_header__clear_feat(&session->header, HEADER_BUILD_ID); + perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); + perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); +} + +static int __cmd_record(int argc, const char **argv) +{ + struct perf_session *session; + struct perf_data_file *file = &perf_stat.file; + + argc = parse_options(argc, argv, stat_options, stat_record_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (output_name) + file->path = output_name; + + if (run_count != 1 || forever) { + pr_err("Cannot use -r option with perf stat record.\n"); + return -1; + } + + session = perf_session__new(file, false, NULL); + if (session == NULL) { + pr_err("Perf session creation failed.\n"); + return -1; + } + + init_features(session); + + session->evlist = evsel_list; + perf_stat.session = session; + perf_stat.record = true; + return argc; +} + +static int process_stat_round_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + struct stat_round_event *round = &event->stat_round; + struct perf_evsel *counter; + struct timespec tsh, *ts = NULL; + const char **argv = session->header.env.cmdline_argv; + int argc = session->header.env.nr_cmdline; + + evlist__for_each(evsel_list, counter) + perf_stat_process_counter(&stat_config, counter); + + if (round->type == PERF_STAT_ROUND_TYPE__FINAL) + update_stats(&walltime_nsecs_stats, round->time); + + if (stat_config.interval && round->time) { + tsh.tv_sec = round->time / NSECS_PER_SEC; + tsh.tv_nsec = round->time % NSECS_PER_SEC; + ts = &tsh; + } + + print_counters(ts, argc, argv); + return 0; +} + +static +int process_stat_config_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct perf_stat *st = container_of(tool, struct perf_stat, tool); + + perf_event__read_stat_config(&stat_config, &event->stat_config); + + if (cpu_map__empty(st->cpus)) { + if (st->aggr_mode != AGGR_UNSET) + pr_warning("warning: processing task data, aggregation mode not set\n"); + return 0; + } + + if (st->aggr_mode != AGGR_UNSET) + stat_config.aggr_mode = st->aggr_mode; + + if (perf_stat.file.is_pipe) + perf_stat_init_aggr_mode(); + else + perf_stat_init_aggr_mode_file(st); + + return 0; +} + +static int set_maps(struct perf_stat *st) +{ + if (!st->cpus || !st->threads) + return 0; + + if (WARN_ONCE(st->maps_allocated, "stats double allocation\n")) + return -EINVAL; + + perf_evlist__set_maps(evsel_list, st->cpus, st->threads); + + if (perf_evlist__alloc_stats(evsel_list, true)) + return -ENOMEM; + + st->maps_allocated = true; + return 0; +} + +static +int process_thread_map_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct perf_stat *st = container_of(tool, struct perf_stat, tool); + + if (st->threads) { + pr_warning("Extra thread map event, ignoring.\n"); + return 0; + } + + st->threads = thread_map__new_event(&event->thread_map); + if (!st->threads) + return -ENOMEM; + + return set_maps(st); +} + +static +int process_cpu_map_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + struct perf_stat *st = container_of(tool, struct perf_stat, tool); + struct cpu_map *cpus; + + if (st->cpus) { + pr_warning("Extra cpu map event, ignoring.\n"); + return 0; + } + + cpus = cpu_map__new_data(&event->cpu_map.data); + if (!cpus) + return -ENOMEM; + + st->cpus = cpus; + return set_maps(st); +} + +static const char * const stat_report_usage[] = { + "perf stat report [<options>]", + NULL, +}; + +static struct perf_stat perf_stat = { + .tool = { + .attr = perf_event__process_attr, + .event_update = perf_event__process_event_update, + .thread_map = process_thread_map_event, + .cpu_map = process_cpu_map_event, + .stat_config = process_stat_config_event, + .stat = perf_event__process_stat_event, + .stat_round = process_stat_round_event, + }, + .aggr_mode = AGGR_UNSET, +}; + +static int __cmd_report(int argc, const char **argv) { - bool append_file = false; - int output_fd = 0; - const char *output_name = NULL; + struct perf_session *session; const struct option options[] = { - OPT_BOOLEAN('T', "transaction", &transaction_run, - "hardware transaction statistics"), - OPT_CALLBACK('e', "event", &evsel_list, "event", - "event selector. use 'perf list' to list available events", - parse_events_option), - OPT_CALLBACK(0, "filter", &evsel_list, "filter", - "event filter", parse_filter), - OPT_BOOLEAN('i', "no-inherit", &no_inherit, - "child tasks do not inherit counters"), - OPT_STRING('p', "pid", &target.pid, "pid", - "stat events on existing process id"), - OPT_STRING('t', "tid", &target.tid, "tid", - "stat events on existing thread id"), - OPT_BOOLEAN('a', "all-cpus", &target.system_wide, - "system-wide collection from all CPUs"), - OPT_BOOLEAN('g', "group", &group, - "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), - OPT_INCR('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), - OPT_INTEGER('r', "repeat", &run_count, - "repeat command and print average + stddev (max: 100, forever: 0)"), - OPT_BOOLEAN('n', "null", &null_run, - "null run - dont start any counters"), - OPT_INCR('d', "detailed", &detailed_run, - "detailed run - start a lot of events"), - OPT_BOOLEAN('S', "sync", &sync_run, - "call sync() before starting a run"), - OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL, - "print large numbers with thousands\' separators", - stat__set_big_num), - OPT_STRING('C', "cpu", &target.cpu_list, "cpu", - "list of cpus to monitor in system-wide"), - OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, - "disable CPU count aggregation", AGGR_NONE), - OPT_STRING('x', "field-separator", &csv_sep, "separator", - "print counts with custom separator"), - OPT_CALLBACK('G', "cgroup", &evsel_list, "name", - "monitor event in cgroup name only", parse_cgroups), - OPT_STRING('o', "output", &output_name, "file", "output file name"), - OPT_BOOLEAN(0, "append", &append_file, "append to the output file"), - OPT_INTEGER(0, "log-fd", &output_fd, - "log output to fd, instead of stderr"), - OPT_STRING(0, "pre", &pre_cmd, "command", - "command to run prior to the measured command"), - OPT_STRING(0, "post", &post_cmd, "command", - "command to run after to the measured command"), - OPT_UINTEGER('I', "interval-print", &stat_config.interval, - "print counts at regular interval in ms (>= 10)"), - OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, + OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, "aggregate counts per processor socket", AGGR_SOCKET), - OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, + OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), - OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, - "aggregate counts per thread", AGGR_THREAD), - OPT_UINTEGER('D', "delay", &initial_delay, - "ms to wait before starting measurement after program start"), + OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, + "disable CPU count aggregation", AGGR_NONE), OPT_END() }; + struct stat st; + int ret; + + argc = parse_options(argc, argv, options, stat_report_usage, 0); + + if (!input_name || !strlen(input_name)) { + if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode)) + input_name = "-"; + else + input_name = "perf.data"; + } + + perf_stat.file.path = input_name; + perf_stat.file.mode = PERF_DATA_MODE_READ; + + session = perf_session__new(&perf_stat.file, false, &perf_stat.tool); + if (session == NULL) + return -1; + + perf_stat.session = session; + stat_config.output = stderr; + evsel_list = session->evlist; + + ret = perf_session__process_events(session); + if (ret) + return ret; + + perf_session__delete(session); + return 0; +} + +int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) +{ const char * const stat_usage[] = { "perf stat [<options>] [<command>]", NULL @@ -1264,6 +2164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) const char *mode; FILE *output = stderr; unsigned int interval; + const char * const stat_subcommands[] = { "record", "report" }; setlocale(LC_ALL, ""); @@ -1271,24 +2172,54 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (evsel_list == NULL) return -ENOMEM; - argc = parse_options(argc, argv, options, stat_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + parse_events__shrink_config_terms(); + argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands, + (const char **) stat_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + perf_stat__init_shadow_stats(); + + if (csv_sep) { + csv_output = true; + if (!strcmp(csv_sep, "\\t")) + csv_sep = "\t"; + } else + csv_sep = DEFAULT_SEPARATOR; + + if (argc && !strncmp(argv[0], "rec", 3)) { + argc = __cmd_record(argc, argv); + if (argc < 0) + return -1; + } else if (argc && !strncmp(argv[0], "rep", 3)) + return __cmd_report(argc, argv); interval = stat_config.interval; - if (output_name && strcmp(output_name, "-")) + /* + * For record command the -o is already taken care of. + */ + if (!STAT_RECORD && output_name && strcmp(output_name, "-")) output = NULL; if (output_name && output_fd) { fprintf(stderr, "cannot use both --output and --log-fd\n"); - parse_options_usage(stat_usage, options, "o", 1); - parse_options_usage(NULL, options, "log-fd", 0); + parse_options_usage(stat_usage, stat_options, "o", 1); + parse_options_usage(NULL, stat_options, "log-fd", 0); + goto out; + } + + if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + fprintf(stderr, "--metric-only is not supported with --per-thread\n"); + goto out; + } + + if (metric_only && run_count > 1) { + fprintf(stderr, "--metric-only is not supported with -r\n"); goto out; } if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); - parse_options_usage(stat_usage, options, "log-fd", 0); + parse_options_usage(stat_usage, stat_options, "log-fd", 0); goto out; } @@ -1314,13 +2245,6 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) stat_config.output = output; - if (csv_sep) { - csv_output = true; - if (!strcmp(csv_sep, "\\t")) - csv_sep = "\t"; - } else - csv_sep = DEFAULT_SEPARATOR; - /* * let the spreadsheet do the pretty-printing */ @@ -1328,8 +2252,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) /* User explicitly passed -B? */ if (big_num_opt == 1) { fprintf(stderr, "-B option not supported with -x\n"); - parse_options_usage(stat_usage, options, "B", 1); - parse_options_usage(NULL, options, "x", 1); + parse_options_usage(stat_usage, stat_options, "B", 1); + parse_options_usage(NULL, stat_options, "x", 1); goto out; } else /* Nope, so disable big number formatting */ big_num = false; @@ -1337,11 +2261,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) big_num = false; if (!argc && target__none(&target)) - usage_with_options(stat_usage, options); + usage_with_options(stat_usage, stat_options); if (run_count < 0) { pr_err("Run count must be a positive number\n"); - parse_options_usage(stat_usage, options, "r", 1); + parse_options_usage(stat_usage, stat_options, "r", 1); goto out; } else if (run_count == 0) { forever = true; @@ -1351,8 +2275,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { fprintf(stderr, "The --per-thread option is only available " "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, options, "p", 1); - parse_options_usage(NULL, options, "t", 1); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); goto out; } @@ -1366,9 +2290,9 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); - parse_options_usage(stat_usage, options, "G", 1); - parse_options_usage(NULL, options, "A", 1); - parse_options_usage(NULL, options, "a", 1); + parse_options_usage(stat_usage, stat_options, "G", 1); + parse_options_usage(NULL, stat_options, "A", 1); + parse_options_usage(NULL, stat_options, "a", 1); goto out; } @@ -1380,12 +2304,12 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); - parse_options_usage(stat_usage, options, "p", 1); - parse_options_usage(NULL, options, "t", 1); + parse_options_usage(stat_usage, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); } else if (target__has_cpu(&target)) { perror("failed to parse CPUs map"); - parse_options_usage(stat_usage, options, "C", 1); - parse_options_usage(NULL, options, "a", 1); + parse_options_usage(stat_usage, stat_options, "C", 1); + parse_options_usage(NULL, stat_options, "a", 1); } goto out; } @@ -1400,7 +2324,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (interval && interval < 100) { if (interval < 10) { pr_err("print interval must be >= 10ms\n"); - parse_options_usage(stat_usage, options, "I", 1); + parse_options_usage(stat_usage, stat_options, "I", 1); goto out; } else pr_warning("print interval < 100ms. " @@ -1443,6 +2367,42 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (!forever && status != -1 && !interval) print_counters(NULL, argc, argv); + if (STAT_RECORD) { + /* + * We synthesize the kernel mmap record just so that older tools + * don't emit warnings about not being able to resolve symbols + * due to /proc/sys/kernel/kptr_restrict settings and instear provide + * a saner message about no samples being in the perf.data file. + * + * This also serves to suppress a warning about f_header.data.size == 0 + * in header.c at the moment 'perf stat record' gets introduced, which + * is not really needed once we start adding the stat specific PERF_RECORD_ + * records, but the need to suppress the kptr_restrict messages in older + * tools remain -acme + */ + int fd = perf_data_file__fd(&perf_stat.file); + int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat, + process_synthesized_event, + &perf_stat.session->machines.host); + if (err) { + pr_warning("Couldn't synthesize the kernel mmap record, harmless, " + "older tools may produce warnings about this file\n."); + } + + if (!interval) { + if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL)) + pr_err("failed to write stat round event\n"); + } + + if (!perf_stat.file.is_pipe) { + perf_stat.session->header.data_size += perf_stat.bytes_written; + perf_session__write_header(perf_stat.session, evsel_list, fd, true); + } + + perf_session__delete(perf_stat.session); + } + + perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: perf_evlist__delete(evsel_list); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 30e59620179d..bd7a7757176f 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -30,7 +30,7 @@ #include "perf.h" #include "util/header.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/event.h" #include "util/session.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 7e2e72e6d9d1..94af190f6843 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -34,7 +34,7 @@ #include "util/top.h" #include "util/util.h" #include <linux/rbtree.h> -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-events.h" #include "util/cpumap.h" #include "util/xyarray.h" @@ -175,42 +175,40 @@ static void perf_top__record_precise_ip(struct perf_top *top, int counter, u64 ip) { struct annotation *notes; - struct symbol *sym; + struct symbol *sym = he->ms.sym; int err = 0; - if (he == NULL || he->ms.sym == NULL || - ((top->sym_filter_entry == NULL || - top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1)) + if (sym == NULL || (use_browser == 0 && + (top->sym_filter_entry == NULL || + top->sym_filter_entry->ms.sym != sym))) return; - sym = he->ms.sym; notes = symbol__annotation(sym); if (pthread_mutex_trylock(¬es->lock)) return; - ip = he->ms.map->map_ip(he->ms.map, ip); - - if (ui__has_annotation()) - err = hist_entry__inc_addr_samples(he, counter, ip); + err = hist_entry__inc_addr_samples(he, counter, ip); pthread_mutex_unlock(¬es->lock); - /* - * This function is now called with he->hists->lock held. - * Release it before going to sleep. - */ - pthread_mutex_unlock(&he->hists->lock); + if (unlikely(err)) { + /* + * This function is now called with he->hists->lock held. + * Release it before going to sleep. + */ + pthread_mutex_unlock(&he->hists->lock); + + if (err == -ERANGE && !he->ms.map->erange_warned) + ui__warn_map_erange(he->ms.map, sym, ip); + else if (err == -ENOMEM) { + pr_err("Not enough memory for annotating '%s' symbol!\n", + sym->name); + sleep(1); + } - if (err == -ERANGE && !he->ms.map->erange_warned) - ui__warn_map_erange(he->ms.map, sym, ip); - else if (err == -ENOMEM) { - pr_err("Not enough memory for annotating '%s' symbol!\n", - sym->name); - sleep(1); + pthread_mutex_lock(&he->hists->lock); } - - pthread_mutex_lock(&he->hists->lock); } static void perf_top__show_details(struct perf_top *top) @@ -254,7 +252,8 @@ static void perf_top__print_sym_table(struct perf_top *top) char bf[160]; int printed = 0; const int win_width = top->winsize.ws_col - 1; - struct hists *hists = evsel__hists(top->sym_evsel); + struct perf_evsel *evsel = top->sym_evsel; + struct hists *hists = evsel__hists(evsel); puts(CONSOLE_CLEAR); @@ -290,7 +289,7 @@ static void perf_top__print_sym_table(struct perf_top *top) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); @@ -542,6 +541,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) static void perf_top__sort_new_samples(void *arg) { struct perf_top *t = arg; + struct perf_evsel *evsel = t->sym_evsel; struct hists *hists; perf_top__reset_sample_counters(t); @@ -549,7 +549,7 @@ static void perf_top__sort_new_samples(void *arg) if (t->evlist->selected != NULL) t->sym_evsel = t->evlist->selected; - hists = evsel__hists(t->sym_evsel); + hists = evsel__hists(evsel); if (t->evlist->enabled) { if (t->zero) { @@ -561,7 +561,7 @@ static void perf_top__sort_new_samples(void *arg) } hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); } static void *display_thread_tui(void *arg) @@ -687,14 +687,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct perf_evsel *evsel = iter->evsel; - if (sort__has_sym && single) { - u64 ip = al->addr; - - if (al->map) - ip = al->map->unmap_ip(al->map, ip); - - perf_top__record_precise_ip(top, he, evsel->idx, ip); - } + if (sort__has_sym && single) + perf_top__record_precise_ip(top, he, evsel->idx, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); @@ -964,7 +958,7 @@ static int __cmd_top(struct perf_top *top) if (ret) goto out_delete; - if (perf_session__register_idle_thread(top->session) == NULL) + if (perf_session__register_idle_thread(top->session) < 0) goto out_delete; machine__synthesize_threads(&top->session->machines.host, &opts->target, @@ -1071,7 +1065,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_top_opt(arg); } -static int perf_top_config(const char *var, const char *value, void *cb) +static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused) { if (!strcmp(var, "top.call-graph")) var = "call-graph.record-mode"; /* fall-through */ @@ -1080,7 +1074,7 @@ static int perf_top_config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } static int @@ -1218,6 +1212,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, "branch filter mask", "branch stack filter modes", parse_branch_stack), + OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace, + "Show raw trace event output (do not use print fmt or plugins)"), + OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, + "Show entries in a hierarchy"), OPT_END() }; const char * const top_usage[] = { @@ -1239,11 +1237,37 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (argc) usage_with_options(top_usage, options); + if (!top.evlist->nr_entries && + perf_evlist__add_default(top.evlist) < 0) { + pr_err("Not enough memory for event selector list\n"); + goto out_delete_evlist; + } + + if (symbol_conf.report_hierarchy) { + /* disable incompatible options */ + symbol_conf.event_group = false; + symbol_conf.cumulate_callchain = false; + + if (field_order) { + pr_err("Error: --hierarchy and --fields options cannot be used together\n"); + parse_options_usage(top_usage, options, "fields", 0); + parse_options_usage(NULL, options, "hierarchy", 0); + goto out_delete_evlist; + } + } + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ sort__need_collapse = 1; - if (setup_sorting() < 0) { + if (top.use_stdio) + use_browser = 0; + else if (top.use_tui) + use_browser = 1; + + setup_browser(false); + + if (setup_sorting(top.evlist) < 0) { if (sort_order) parse_options_usage(top_usage, options, "s", 1); if (field_order) @@ -1252,13 +1276,6 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) goto out_delete_evlist; } - if (top.use_stdio) - use_browser = 0; - else if (top.use_tui) - use_browser = 1; - - setup_browser(false); - status = target__validate(target); if (status) { target__strerror(target, status, errbuf, BUFSIZ); @@ -1279,12 +1296,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) if (target__none(target)) target->system_wide = true; - if (perf_evlist__create_maps(top.evlist, target) < 0) - usage_with_options(top_usage, options); - - if (!top.evlist->nr_entries && - perf_evlist__add_default(top.evlist) < 0) { - ui__error("Not enough memory for event selector list\n"); + if (perf_evlist__create_maps(top.evlist, target) < 0) { + ui__error("Couldn't create thread/CPU maps: %s\n", + errno == ENOENT ? "No such process" : strerror_r(errno, errbuf, sizeof(errbuf))); goto out_delete_evlist; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index c783d8fd3a80..8dc98c598b1a 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -22,17 +22,18 @@ #include "util/color.h" #include "util/debug.h" #include "util/evlist.h" -#include "util/exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "util/machine.h" #include "util/session.h" #include "util/thread.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/strlist.h" #include "util/intlist.h" #include "util/thread_map.h" #include "util/stat.h" #include "trace-event.h" #include "util/parse-events.h" +#include "util/bpf-loader.h" #include <libaudit.h> #include <stdlib.h> @@ -1724,8 +1725,12 @@ static int trace__read_syscall_info(struct trace *trace, int id) sc->args = sc->tp_format->format.fields; sc->nr_args = sc->tp_format->format.nr_fields; - /* drop nr field - not relevant here; does not exist on older kernels */ - if (sc->args && strcmp(sc->args->name, "nr") == 0) { + /* + * We need to check and discard the first variable '__syscall_nr' + * or 'nr' that mean the syscall number. It is needless here. + * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. + */ + if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) { sc->args = sc->args->next; --sc->nr_args; } @@ -2177,6 +2182,37 @@ out_dump: return 0; } +static void bpf_output__printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + FILE *output = extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_CHAR_DATA: + fprintf(output, "%c", isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_DATA_BEGIN: + case BINARY_PRINT_LINE_BEGIN: + case BINARY_PRINT_ADDR: + case BINARY_PRINT_NUM_DATA: + case BINARY_PRINT_NUM_PAD: + case BINARY_PRINT_SEP: + case BINARY_PRINT_CHAR_PAD: + case BINARY_PRINT_LINE_END: + case BINARY_PRINT_DATA_END: + default: + break; + } +} + +static void bpf_output__fprintf(struct trace *trace, + struct perf_sample *sample) +{ + print_binary(sample->raw_data, sample->raw_size, 8, + bpf_output__printer, trace->output); +} + static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -2189,7 +2225,9 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, fprintf(trace->output, "%s:", evsel->name); - if (evsel->tp_format) { + if (perf_evsel__is_bpf_output(evsel)) { + bpf_output__fprintf(trace, sample); + } else if (evsel->tp_format) { event_format__fprintf(evsel->tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); @@ -2586,6 +2624,16 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + err = bpf__apply_obj_config(); + if (err) { + char errbuf[BUFSIZ]; + + bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf)); + pr_err("ERROR: Apply config to BPF failed: %s\n", + errbuf); + goto out_error_open; + } + /* * Better not use !target__has_task() here because we need to cover the * case where no threads were specified in the command line, but a diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c new file mode 100644 index 000000000000..9b10cda6b6dc --- /dev/null +++ b/tools/perf/builtin-version.c @@ -0,0 +1,10 @@ +#include "util/util.h" +#include "builtin.h" +#include "perf.h" + +int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, + const char *prefix __maybe_unused) +{ + printf("perf version %s\n", perf_version_string); + return 0; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 3688ad29085f..3f871b54e261 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -17,6 +17,7 @@ extern int cmd_annotate(int argc, const char **argv, const char *prefix); extern int cmd_bench(int argc, const char **argv, const char *prefix); extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix); extern int cmd_buildid_list(int argc, const char **argv, const char *prefix); +extern int cmd_config(int argc, const char **argv, const char *prefix); extern int cmd_diff(int argc, const char **argv, const char *prefix); extern int cmd_evlist(int argc, const char **argv, const char *prefix); extern int cmd_help(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 00fcaf8a5b8d..ab5cbaa170d0 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -9,6 +9,7 @@ perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common perf-data mainporcelain common perf-diff mainporcelain common +perf-config mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common perf-kmem mainporcelain common @@ -25,4 +26,4 @@ perf-stat mainporcelain common perf-test mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common -perf-trace mainporcelain common +perf-trace mainporcelain audit diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index de89ec574361..eca6a912e8c2 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -17,7 +17,7 @@ detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) -include $(src-perf)/config/Makefile.arch +include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,ARCH) @@ -61,50 +61,45 @@ endif ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 -else - # - # For linking with debug library, run like: - # - # make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ - # - ifdef LIBUNWIND_DIR - LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include - LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib - endif - LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) - - # Set per-feature check compilation flags - FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) - FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) - FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) endif +# +# For linking with debug library, run like: +# +# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/ +# +ifdef LIBUNWIND_DIR + LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include + LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib +endif +LIBUNWIND_LDFLAGS += $(LIBUNWIND_LIBS) + +# Set per-feature check compilation flags +FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) +FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) +FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif -ifndef NO_LIBELF - # for linking with debug library, run like: - # make DEBUG=1 LIBDW_DIR=/opt/libdw/ - ifdef LIBDW_DIR - LIBDW_CFLAGS := -I$(LIBDW_DIR)/include - LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) - FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw +# for linking with debug library, run like: +# make DEBUG=1 LIBDW_DIR=/opt/libdw/ +ifdef LIBDW_DIR + LIBDW_CFLAGS := -I$(LIBDW_DIR)/include + LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw -ifdef LIBBABELTRACE - # for linking with debug library, run like: - # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ - ifdef LIBBABELTRACE_DIR - LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include - LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib - endif - FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) - FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +# for linking with debug library, run like: +# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ +ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib endif +FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) +FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi # include ARCH specific config @@ -135,8 +130,6 @@ endif ifeq ($(DEBUG),0) CFLAGS += -O6 -else - CFLAGS += $(call cc-option,-Og,-O0) endif ifdef PARSER_DEBUG @@ -147,28 +140,26 @@ ifdef PARSER_DEBUG $(call detected_var,PARSER_DEBUG_FLEX) endif -ifndef NO_LIBPYTHON - # Try different combinations to accommodate systems that only have - # python[2][-config] in weird combinations but always preferring - # python2 and python2-config as per pep-0394. If we catch a - # python[-config] in version 3, the version check will kill it. - PYTHON2 := $(if $(call get-executable,python2),python2,python) - override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) - PYTHON2_CONFIG := \ - $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) - override PYTHON_CONFIG := \ - $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) +# Try different combinations to accommodate systems that only have +# python[2][-config] in weird combinations but always preferring +# python2 and python2-config as per pep-0394. If we catch a +# python[-config] in version 3, the version check will kill it. +PYTHON2 := $(if $(call get-executable,python2),python2,python) +override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2)) +PYTHON2_CONFIG := \ + $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config) +override PYTHON_CONFIG := \ + $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG)) - PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) +PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) - PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) +PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) +PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) - FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) - FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) -endif +FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) +FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) +FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 @@ -183,7 +174,11 @@ LDFLAGS += -Wl,-z,noexecstack EXTLIBS = -lpthread -lrt -lm -ldl +ifeq ($(FEATURES_DUMP),) include $(srctree)/tools/build/Makefile.feature +else +include $(FEATURES_DUMP) +endif ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all @@ -318,9 +313,28 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_LIBBPF_SUPPORT $(call detected,CONFIG_LIBBPF) endif + + ifndef NO_DWARF + ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET + CFLAGS += -DHAVE_BPF_PROLOGUE + $(call detected,CONFIG_BPF_PROLOGUE) + else + msg := $(warning BPF prologue is not supported by architecture $(ARCH), missing regs_query_register_offset()); + endif + else + msg := $(warning DWARF support is off, BPF prologue is disabled); + endif + endif # NO_LIBBPF endif # NO_LIBELF +ifdef PERF_HAVE_JITDUMP + ifndef NO_DWARF + $(call detected,CONFIG_JITDUMP) + CFLAGS += -DHAVE_JITDUMP + endif +endif + ifeq ($(ARCH),powerpc) ifndef NO_DWARF CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX @@ -397,6 +411,17 @@ ifndef NO_LIBAUDIT endif endif +ifndef NO_LIBCRYPTO + ifneq ($(feature-libcrypto), 1) + msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev); + NO_LIBCRYPTO := 1 + else + CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT + EXTLIBS += -lcrypto + $(call detected,CONFIG_CRYPTO) + endif +endif + ifdef NO_NEWT NO_SLANG=1 endif @@ -483,7 +508,7 @@ else PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) - PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) + PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) @@ -681,6 +706,8 @@ sharedir = $(prefix)/share template_dir = share/perf-core/templates STRACE_GROUPS_DIR = share/perf-core/strace/groups htmldir = share/doc/perf-doc +tipdir = share/doc/perf-tip +srcdir = $(srctree)/tools/perf ifeq ($(prefix),/usr) sysconfdir = /etc ETC_PERFCONFIG = $(sysconfdir)/perfconfig @@ -707,19 +734,24 @@ infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) +tipdir_SQ = $(subst ','\'',$(tipdir)) prefix_SQ = $(subst ','\'',$(prefix)) sysconfdir_SQ = $(subst ','\'',$(sysconfdir)) libdir_SQ = $(subst ','\'',$(libdir)) +srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) +tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) +tip_instdir = $(prefix)/$(tipdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) +tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) # If we install to $(HOME) we keep the traceevent default: # $(HOME)/.traceevent/plugins @@ -741,6 +773,10 @@ ifeq ($(VF),1) $(call print_var,sysconfdir) $(call print_var,LIBUNWIND_DIR) $(call print_var,LIBDW_DIR) + + ifeq ($(dwarf-post-unwind),1) + $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) + endif $(info ) endif @@ -756,6 +792,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) +$(call detected_var,tipdir_SQ) +$(call detected_var,srcdir_SQ) $(call detected_var,LIBDIR) $(call detected_var,GTK_CFLAGS) $(call detected_var,PERL_EMBED_CCOPTS) diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch deleted file mode 100644 index e11fbd6fae78..000000000000 --- a/tools/perf/config/Makefile.arch +++ /dev/null @@ -1,18 +0,0 @@ -ifndef ARCH -ARCH := $(shell uname -m 2>/dev/null || echo not) -endif - -ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ - -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ - -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ - -e s/s390x/s390/ -e s/parisc64/parisc/ \ - -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ - -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ - -e s/tile.*/tile/ ) - -LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) -ifeq ($(LP64), 1) - IS_64_BIT := 1 -else - IS_64_BIT := 0 -endif diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 0ebef09c0842..c16ce833079c 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -177,22 +177,3 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef _ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) - -# try-run -# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) -# Exit code chooses option. "$$TMP" is can be used as temporary file and -# is automatically cleaned up. -try-run = $(shell set -e; \ - TMP="$(TMPOUT).$$$$.tmp"; \ - TMPO="$(TMPOUT).$$$$.o"; \ - if ($(1)) >/dev/null 2>&1; \ - then echo "$(2)"; \ - else echo "$(3)"; \ - fi; \ - rm -f "$$TMP" "$$TMPO") - -# cc-option -# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) - -cc-option = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) diff --git a/tools/perf/jvmti/Makefile b/tools/perf/jvmti/Makefile new file mode 100644 index 000000000000..5ce61a1bda9c --- /dev/null +++ b/tools/perf/jvmti/Makefile @@ -0,0 +1,89 @@ +ARCH=$(shell uname -m) + +ifeq ($(ARCH), x86_64) +JARCH=amd64 +endif +ifeq ($(ARCH), armv7l) +JARCH=armhf +endif +ifeq ($(ARCH), armv6l) +JARCH=armhf +endif +ifeq ($(ARCH), aarch64) +JARCH=aarch64 +endif +ifeq ($(ARCH), ppc64) +JARCH=powerpc +endif +ifeq ($(ARCH), ppc64le) +JARCH=powerpc +endif + +DESTDIR=/usr/local + +VERSION=1 +REVISION=0 +AGE=0 + +LN=ln -sf +RM=rm + +SLIBJVMTI=libjvmti.so.$(VERSION).$(REVISION).$(AGE) +VLIBJVMTI=libjvmti.so.$(VERSION) +SLDFLAGS=-shared -Wl,-soname -Wl,$(VLIBJVMTI) +SOLIBEXT=so + +# The following works at least on fedora 23, you may need the next +# line for other distros. +ifneq (,$(wildcard /usr/sbin/update-java-alternatives)) +JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | cut -d ' ' -f 3) +else + ifneq (,$(wildcard /usr/sbin/alternatives)) + JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + endif +endif +ifndef JDIR +$(error Could not find alternatives command, you need to set JDIR= to point to the root of your Java directory) +else + ifeq (,$(wildcard $(JDIR)/include/jvmti.h)) + $(error the openjdk development package appears to me missing, install and try again) + endif +endif +$(info Using Java from $(JDIR)) +# -lrt required in 32-bit mode for clock_gettime() +LIBS=-lelf -lrt +INCDIR=-I $(JDIR)/include -I $(JDIR)/include/linux + +TARGETS=$(SLIBJVMTI) + +SRCS=libjvmti.c jvmti_agent.c +OBJS=$(SRCS:.c=.o) +SOBJS=$(OBJS:.o=.lo) +OPT=-O2 -g -Werror -Wall + +CFLAGS=$(INCDIR) $(OPT) + +all: $(TARGETS) + +.c.o: + $(CC) $(CFLAGS) -c $*.c +.c.lo: + $(CC) -fPIC -DPIC $(CFLAGS) -c $*.c -o $*.lo + +$(OBJS) $(SOBJS): Makefile jvmti_agent.h ../util/jitdump.h + +$(SLIBJVMTI): $(SOBJS) + $(CC) $(CFLAGS) $(SLDFLAGS) -o $@ $(SOBJS) $(LIBS) + $(LN) $@ libjvmti.$(SOLIBEXT) + +clean: + $(RM) -f *.o *.so.* *.so *.lo + +install: + -mkdir -p $(DESTDIR)/lib + install -m 755 $(SLIBJVMTI) $(DESTDIR)/lib/ + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) $(VLIBJVMTI)) + (cd $(DESTDIR)/lib; $(LN) $(SLIBJVMTI) libjvmti.$(SOLIBEXT)) + ldconfig + +.SUFFIXES: .c .S .o .lo diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c new file mode 100644 index 000000000000..6461e02ab940 --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.c @@ -0,0 +1,465 @@ +/* + * jvmti_agent.c: JVMTI agent interface + * + * Adapted from the Oprofile code in opagent.c: + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#include <sys/types.h> +#include <sys/stat.h> /* for mkdir() */ +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <limits.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <sys/mman.h> +#include <syscall.h> /* for gettid() */ +#include <err.h> + +#include "jvmti_agent.h" +#include "../util/jitdump.h" + +#define JIT_LANG "java" + +static char jit_path[PATH_MAX]; +static void *marker_addr; + +/* + * padding buffer + */ +static const char pad_bytes[7]; + +static inline pid_t gettid(void) +{ + return (pid_t)syscall(__NR_gettid); +} + +static int get_e_machine(struct jitheader *hdr) +{ + ssize_t sret; + char id[16]; + int fd, ret = -1; + int m = -1; + struct { + uint16_t e_type; + uint16_t e_machine; + } info; + + fd = open("/proc/self/exe", O_RDONLY); + if (fd == -1) + return -1; + + sret = read(fd, id, sizeof(id)); + if (sret != sizeof(id)) + goto error; + + /* check ELF signature */ + if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') + goto error; + + sret = read(fd, &info, sizeof(info)); + if (sret != sizeof(info)) + goto error; + + m = info.e_machine; + if (m < 0) + m = 0; /* ELF EM_NONE */ + + hdr->elf_mach = m; + ret = 0; +error: + close(fd); + return ret; +} + +#define NSEC_PER_SEC 1000000000 +static int perf_clk_id = CLOCK_MONOTONIC; + +static inline uint64_t +timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +static inline uint64_t +perf_get_timestamp(void) +{ + struct timespec ts; + int ret; + + ret = clock_gettime(perf_clk_id, &ts); + if (ret) + return 0; + + return timespec_to_ns(&ts); +} + +static int +debug_cache_init(void) +{ + char str[32]; + char *base, *p; + struct tm tm; + time_t t; + int ret; + + time(&t); + localtime_r(&t, &tm); + + base = getenv("JITDUMPDIR"); + if (!base) + base = getenv("HOME"); + if (!base) + base = "."; + + strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); + + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("jvmti: cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = mkdir(jit_path, 0755); + if (ret == -1) { + if (errno != EEXIST) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + } + + snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); + + p = mkdtemp(jit_path); + if (p != jit_path) { + warn("cannot create jit cache dir %s", jit_path); + return -1; + } + + return 0; +} + +static int +perf_open_marker_file(int fd) +{ + long pgsz; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return -1; + + /* + * we mmap the jitdump to create an MMAP RECORD in perf.data file. + * The mmap is captured either live (perf record running when we mmap) + * or in deferred mode, via /proc/PID/maps + * the MMAP record is used as a marker of a jitdump file for more meta + * data info about the jitted code. Perf report/annotate detect this + * special filename and process the jitdump file. + * + * mapping must be PROT_EXEC to ensure it is captured by perf record + * even when not using -d option + */ + marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0); + return (marker_addr == MAP_FAILED) ? -1 : 0; +} + +static void +perf_close_marker_file(void) +{ + long pgsz; + + if (!marker_addr) + return; + + pgsz = sysconf(_SC_PAGESIZE); + if (pgsz == -1) + return; + + munmap(marker_addr, pgsz); +} + +void *jvmti_open(void) +{ + int pad_cnt; + char dump_path[PATH_MAX]; + struct jitheader header; + int fd; + FILE *fp; + + /* + * check if clockid is supported + */ + if (!perf_get_timestamp()) + warnx("jvmti: kernel does not support %d clock id", perf_clk_id); + + memset(&header, 0, sizeof(header)); + + debug_cache_init(); + + /* + * jitdump file name + */ + snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + + fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + return NULL; + + /* + * create perf.data maker for the jitdump file + */ + if (perf_open_marker_file(fd)) { + warnx("jvmti: failed to create marker file"); + return NULL; + } + + fp = fdopen(fd, "w+"); + if (!fp) { + warn("jvmti: cannot create %s", dump_path); + close(fd); + goto error; + } + + warnx("jvmti: jitdump in %s", dump_path); + + if (get_e_machine(&header)) { + warn("get_e_machine failed\n"); + goto error; + } + + header.magic = JITHEADER_MAGIC; + header.version = JITHEADER_VERSION; + header.total_size = sizeof(header); + header.pid = getpid(); + + /* calculate amount of padding '\0' */ + pad_cnt = PADDING_8ALIGNED(header.total_size); + header.total_size += pad_cnt; + + header.timestamp = perf_get_timestamp(); + + if (!fwrite(&header, sizeof(header), 1, fp)) { + warn("jvmti: cannot write dumpfile header"); + goto error; + } + + /* write padding '\0' if necessary */ + if (pad_cnt && !fwrite(pad_bytes, pad_cnt, 1, fp)) { + warn("jvmti: cannot write dumpfile header padding"); + goto error; + } + + return fp; +error: + fclose(fp); + return NULL; +} + +int +jvmti_close(void *agent) +{ + struct jr_code_close rec; + FILE *fp = agent; + + if (!fp) { + warnx("jvmti: incalid fd in close_agent"); + return -1; + } + + rec.p.id = JIT_CODE_CLOSE; + rec.p.total_size = sizeof(rec); + + rec.p.timestamp = perf_get_timestamp(); + + if (!fwrite(&rec, sizeof(rec), 1, fp)) + return -1; + + fclose(fp); + + fp = NULL; + + perf_close_marker_file(); + + return 0; +} + +int +jvmti_write_code(void *agent, char const *sym, + uint64_t vma, void const *code, unsigned int const size) +{ + static int code_generation = 1; + struct jr_code_load rec; + size_t sym_len; + size_t padding_count; + FILE *fp = agent; + int ret = -1; + + /* don't care about 0 length function, no samples */ + if (size == 0) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_native_code"); + return -1; + } + + sym_len = strlen(sym) + 1; + + rec.p.id = JIT_CODE_LOAD; + rec.p.total_size = sizeof(rec) + sym_len; + padding_count = PADDING_8ALIGNED(rec.p.total_size); + rec.p. total_size += padding_count; + rec.p.timestamp = perf_get_timestamp(); + + rec.code_size = size; + rec.vma = vma; + rec.code_addr = vma; + rec.pid = getpid(); + rec.tid = gettid(); + + if (code) + rec.p.total_size += size; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + /* + * get code index inside lock to avoid race condition + */ + rec.code_index = code_generation++; + + ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + fwrite_unlocked(sym, sym_len, 1, fp); + + if (padding_count) + fwrite_unlocked(pad_bytes, padding_count, 1, fp); + + if (code) + fwrite_unlocked(code, size, 1, fp); + + funlockfile(fp); + + ret = 0; + + return ret; +} + +int +jvmti_write_debug_info(void *agent, uint64_t code, const char *file, + jvmti_line_info_t *li, int nr_lines) +{ + struct jr_code_debug_info rec; + size_t sret, len, size, flen; + size_t padding_count; + uint64_t addr; + const char *fn = file; + FILE *fp = agent; + int i; + + /* + * no entry to write + */ + if (!nr_lines) + return 0; + + if (!fp) { + warnx("jvmti: invalid fd in write_debug_info"); + return -1; + } + + flen = strlen(file) + 1; + + rec.p.id = JIT_CODE_DEBUG_INFO; + size = sizeof(rec); + rec.p.timestamp = perf_get_timestamp(); + rec.code_addr = (uint64_t)(uintptr_t)code; + rec.nr_entry = nr_lines; + + /* + * on disk source line info layout: + * uint64_t : addr + * int : line number + * int : column discriminator + * file[] : source file name + * padding : pad to multiple of 8 bytes + */ + size += nr_lines * sizeof(struct debug_entry); + size += flen * nr_lines; + /* + * pad to 8 bytes + */ + padding_count = PADDING_8ALIGNED(size); + + rec.p.total_size = size + padding_count; + + /* + * If JVM is multi-threaded, nultiple concurrent calls to agent + * may be possible, so protect file writes + */ + flockfile(fp); + + sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp); + if (sret != 1) + goto error; + + for (i = 0; i < nr_lines; i++) { + + addr = (uint64_t)li[i].pc; + len = sizeof(addr); + sret = fwrite_unlocked(&addr, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].line_number); + sret = fwrite_unlocked(&li[i].line_number, len, 1, fp); + if (sret != 1) + goto error; + + len = sizeof(li[0].discrim); + sret = fwrite_unlocked(&li[i].discrim, len, 1, fp); + if (sret != 1) + goto error; + + sret = fwrite_unlocked(fn, flen, 1, fp); + if (sret != 1) + goto error; + } + if (padding_count) + sret = fwrite_unlocked(pad_bytes, padding_count, 1, fp); + if (sret != 1) + goto error; + + funlockfile(fp); + return 0; +error: + funlockfile(fp); + return -1; +} diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h new file mode 100644 index 000000000000..bedf5d0ba9ff --- /dev/null +++ b/tools/perf/jvmti/jvmti_agent.h @@ -0,0 +1,36 @@ +#ifndef __JVMTI_AGENT_H__ +#define __JVMTI_AGENT_H__ + +#include <sys/types.h> +#include <stdint.h> +#include <jvmti.h> + +#define __unused __attribute__((unused)) + +#if defined(__cplusplus) +extern "C" { +#endif + +typedef struct { + unsigned long pc; + int line_number; + int discrim; /* discriminator -- 0 for now */ +} jvmti_line_info_t; + +void *jvmti_open(void); +int jvmti_close(void *agent); +int jvmti_write_code(void *agent, char const *symbol_name, + uint64_t vma, void const *code, + const unsigned int code_size); + +int jvmti_write_debug_info(void *agent, + uint64_t code, + const char *file, + jvmti_line_info_t *li, + int nr_lines); + +#if defined(__cplusplus) +} + +#endif +#endif /* __JVMTI_H__ */ diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c new file mode 100644 index 000000000000..ac12e4b91a92 --- /dev/null +++ b/tools/perf/jvmti/libjvmti.c @@ -0,0 +1,304 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <err.h> +#include <jvmti.h> +#include <jvmticmlr.h> +#include <limits.h> + +#include "jvmti_agent.h" + +static int has_line_numbers; +void *jvmti_agent; + +static jvmtiError +do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci, + jvmti_line_info_t *tab, jint *nr) +{ + jint i, lines = 0; + jint nr_lines = 0; + jvmtiLineNumberEntry *loc_tab = NULL; + jvmtiError ret; + + ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab); + if (ret != JVMTI_ERROR_NONE) + return ret; + + for (i = 0; i < nr_lines; i++) { + if (loc_tab[i].start_location < bci) { + tab[lines].pc = (unsigned long)pc; + tab[lines].line_number = loc_tab[i].line_number; + tab[lines].discrim = 0; /* not yet used */ + lines++; + } else { + break; + } + } + (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab); + *nr = lines; + return JVMTI_ERROR_NONE; +} + +static jvmtiError +get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines) +{ + const jvmtiCompiledMethodLoadRecordHeader *hdr; + jvmtiCompiledMethodLoadInlineRecord *rec; + jvmtiLineNumberEntry *lne = NULL; + PCStackInfo *c; + jint nr, ret; + int nr_total = 0; + int i, lines_total = 0; + + if (!(tab && nr_lines)) + return JVMTI_ERROR_NULL_POINTER; + + /* + * Phase 1 -- get the number of lines necessary + */ + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + /* + * unfortunately, need a tab to get the number of lines! + */ + ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne); + if (ret == JVMTI_ERROR_NONE) { + /* free what was allocated for nothing */ + (*jvmti)->Deallocate(jvmti, (unsigned char *)lne); + nr_total += (int)nr; + } + } + } + } + + if (nr_total == 0) + return JVMTI_ERROR_NOT_FOUND; + + /* + * Phase 2 -- allocate big enough line table + */ + *tab = malloc(nr_total * sizeof(**tab)); + if (!*tab) + return JVMTI_ERROR_OUT_OF_MEMORY; + + for (hdr = compile_info; hdr != NULL; hdr = hdr->next) { + if (hdr->kind == JVMTI_CMLR_INLINE_INFO) { + rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr; + for (i = 0; i < rec->numpcs; i++) { + c = rec->pcinfo + i; + nr = 0; + ret = do_get_line_numbers(jvmti, c->pc, + c->methods[0], + c->bcis[0], + *tab + lines_total, + &nr); + if (ret == JVMTI_ERROR_NONE) + lines_total += nr; + } + } + } + *nr_lines = lines_total; + return JVMTI_ERROR_NONE; +} + +static void JNICALL +compiled_method_load_cb(jvmtiEnv *jvmti, + jmethodID method, + jint code_size, + void const *code_addr, + jint map_length, + jvmtiAddrLocationMap const *map, + const void *compile_info) +{ + jvmti_line_info_t *line_tab = NULL; + jclass decl_class; + char *class_sign = NULL; + char *func_name = NULL; + char *func_sign = NULL; + char *file_name= NULL; + char fn[PATH_MAX]; + uint64_t addr = (uint64_t)(uintptr_t)code_addr; + jvmtiError ret; + int nr_lines = 0; /* in line_tab[] */ + size_t len; + + ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method, + &decl_class); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get declaring class"); + return; + } + + if (has_line_numbers && map && map_length) { + ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get line table for method"); + nr_lines = 0; + } + } + + ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot get source filename ret=%d", ret); + goto error; + } + + ret = (*jvmti)->GetClassSignature(jvmti, decl_class, + &class_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: getclassignature failed"); + goto error; + } + + ret = (*jvmti)->GetMethodName(jvmti, method, &func_name, + &func_sign, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: failed getmethodname"); + goto error; + } + + /* + * Assume path name is class hierarchy, this is a common practice with Java programs + */ + if (*class_sign == 'L') { + int j, i = 0; + char *p = strrchr(class_sign, '/'); + if (p) { + /* drop the 'L' prefix and copy up to the final '/' */ + for (i = 0; i < (p - class_sign); i++) + fn[i] = class_sign[i+1]; + } + /* + * append file name, we use loops and not string ops to avoid modifying + * class_sign which is used later for the symbol name + */ + for (j = 0; i < (PATH_MAX - 1) && file_name && j < strlen(file_name); j++, i++) + fn[i] = file_name[j]; + fn[i] = '\0'; + } else { + /* fallback case */ + strcpy(fn, file_name); + } + /* + * write source line info record if we have it + */ + if (jvmti_write_debug_info(jvmti_agent, addr, fn, line_tab, nr_lines)) + warnx("jvmti: write_debug_info() failed"); + + len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2; + { + char str[len]; + snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign); + + if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size)) + warnx("jvmti: write_code() failed"); + } +error: + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name); + (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign); + (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name); + free(line_tab); +} + +static void JNICALL +code_generated_cb(jvmtiEnv *jvmti, + char const *name, + void const *code_addr, + jint code_size) +{ + uint64_t addr = (uint64_t)(unsigned long)code_addr; + int ret; + + ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size); + if (ret) + warnx("jvmti: write_code() failed for code_generated"); +} + +JNIEXPORT jint JNICALL +Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __unused) +{ + jvmtiEventCallbacks cb; + jvmtiCapabilities caps1; + jvmtiJlocationFormat format; + jvmtiEnv *jvmti = NULL; + jint ret; + + jvmti_agent = jvmti_open(); + if (!jvmti_agent) { + warnx("jvmti: open_agent failed"); + return -1; + } + + /* + * Request a JVMTI interface version 1 environment + */ + ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1); + if (ret != JNI_OK) { + warnx("jvmti: jvmti version 1 not supported"); + return -1; + } + + /* + * acquire method_load capability, we require it + * request line numbers (optional) + */ + memset(&caps1, 0, sizeof(caps1)); + caps1.can_generate_compiled_method_load_events = 1; + + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: acquire compiled_method capability failed"); + return -1; + } + ret = (*jvmti)->GetJLocationFormat(jvmti, &format); + if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) { + memset(&caps1, 0, sizeof(caps1)); + caps1.can_get_line_numbers = 1; + caps1.can_get_source_file_name = 1; + ret = (*jvmti)->AddCapabilities(jvmti, &caps1); + if (ret == JVMTI_ERROR_NONE) + has_line_numbers = 1; + } + + memset(&cb, 0, sizeof(cb)); + + cb.CompiledMethodLoad = compiled_method_load_cb; + cb.DynamicCodeGenerated = code_generated_cb; + + ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb)); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: cannot set event callbacks"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed for method_load"); + return -1; + } + + ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE, + JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL); + if (ret != JVMTI_ERROR_NONE) { + warnx("jvmti: setnotification failed on code_generated"); + return -1; + } + return 0; +} + +JNIEXPORT void JNICALL +Agent_OnUnload(JavaVM *jvm __unused) +{ + int ret; + + ret = jvmti_close(jvmti_agent); + if (ret) + errx(1, "Error: op_close_agent()"); +} diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3d4c7c09adea..aaee0a782747 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -9,16 +9,18 @@ #include "builtin.h" #include "util/env.h" -#include "util/exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "util/cache.h" #include "util/quote.h" -#include "util/run-command.h" +#include <subcmd/run-command.h> #include "util/parse-events.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/bpf-loader.h" #include "util/debug.h" #include <api/fs/tracing_path.h> #include <pthread.h> +#include <stdlib.h> +#include <time.h> const char perf_usage_string[] = "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]"; @@ -39,6 +41,7 @@ struct cmd_struct { static struct cmd_struct commands[] = { { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, + { "config", cmd_config, 0 }, { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, @@ -118,7 +121,7 @@ static void commit_pager_choice(void) { switch (use_pager) { case 0: - setenv("PERF_PAGER", "cat", 1); + setenv(PERF_PAGER_ENVIRONMENT, "cat", 1); break; case 1: /* setup_pager(); */ @@ -182,9 +185,9 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) if (!prefixcmp(cmd, CMD_EXEC_PATH)) { cmd += strlen(CMD_EXEC_PATH); if (*cmd == '=') - perf_set_argv_exec_path(cmd + 1); + set_argv_exec_path(cmd + 1); else { - puts(perf_exec_path()); + puts(get_argv_exec_path()); exit(0); } } else if (!strcmp(cmd, "--html-path")) { @@ -383,6 +386,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); + perf_env__set_cmdline(&perf_env, argc, argv); status = p->fn(argc, argv, prefix); exit_browser(status); perf_env__exit(&perf_env); @@ -450,11 +454,12 @@ static void handle_internal_command(int argc, const char **argv) static void execv_dashed_external(const char **argv) { - struct strbuf cmd = STRBUF_INIT; + char *cmd; const char *tmp; int status; - strbuf_addf(&cmd, "perf-%s", argv[0]); + if (asprintf(&cmd, "perf-%s", argv[0]) < 0) + goto do_die; /* * argv[0] must be the perf command, but the argv array @@ -463,7 +468,7 @@ static void execv_dashed_external(const char **argv) * restore it on error. */ tmp = argv[0]; - argv[0] = cmd.buf; + argv[0] = cmd; /* * if we fail because the command is not found, it is @@ -471,15 +476,16 @@ static void execv_dashed_external(const char **argv) */ status = run_command_v_opt(argv, 0); if (status != -ERR_RUN_COMMAND_EXEC) { - if (IS_RUN_COMMAND_ERR(status)) + if (IS_RUN_COMMAND_ERR(status)) { +do_die: die("unable to run '%s'", argv[0]); + } exit(-status); } errno = ENOENT; /* as if we called execvp */ argv[0] = tmp; - - strbuf_release(&cmd); + zfree(&cmd); } static int run_argv(int *argcp, const char ***argv) @@ -528,14 +534,22 @@ int main(int argc, const char **argv) const char *cmd; char sbuf[STRERR_BUFSIZE]; + /* libsubcmd init */ + exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); + pager_init(PERF_PAGER_ENVIRONMENT); + /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); - cmd = perf_extract_argv0_path(argv[0]); + cmd = extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; + srandom(time(NULL)); + + perf_config(perf_default_config, NULL); + /* get debugfs/tracefs mount point from /proc/mounts */ tracing_path_mount(); @@ -603,6 +617,8 @@ int main(int argc, const char **argv) */ pthread__block_sigwinch(); + perf_debug_setup(); + while (1) { static int done_help; int was_alias = run_argv(&argc, &argv); diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 90129accffbe..5381a01c0610 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -58,6 +58,8 @@ struct record_opts { bool full_auxtrace; bool auxtrace_snapshot_mode; bool record_switch_events; + bool all_kernel; + bool all_user; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index 15c8400240fd..1d95009592eb 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -71,7 +71,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names" + print "Install the audit-libs-python package to get syscall names.\n" \ + "For example:\n # apt-get install python-audit (Ubuntu)" \ + "\n # yum install audit-libs-python (Fedora)" \ + "\n etc.\n" def syscall_name(id): try: diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py new file mode 100644 index 000000000000..8b60f343dd07 --- /dev/null +++ b/tools/perf/scripts/python/stat-cpi.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +data = {} +times = [] +threads = [] +cpus = [] + +def get_key(time, event, cpu, thread): + return "%d-%s-%d-%d" % (time, event, cpu, thread) + +def store_key(time, cpu, thread): + if (time not in times): + times.append(time) + + if (cpu not in cpus): + cpus.append(cpu) + + if (thread not in threads): + threads.append(thread) + +def store(time, event, cpu, thread, val, ena, run): + #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \ + # (event, cpu, thread, time, val, ena, run) + + store_key(time, cpu, thread) + key = get_key(time, event, cpu, thread) + data[key] = [ val, ena, run] + +def get(time, event, cpu, thread): + key = get_key(time, event, cpu, thread) + return data[key][0] + +def stat__cycles_k(cpu, thread, time, val, ena, run): + store(time, "cycles", cpu, thread, val, ena, run); + +def stat__instructions_k(cpu, thread, time, val, ena, run): + store(time, "instructions", cpu, thread, val, ena, run); + +def stat__cycles_u(cpu, thread, time, val, ena, run): + store(time, "cycles", cpu, thread, val, ena, run); + +def stat__instructions_u(cpu, thread, time, val, ena, run): + store(time, "instructions", cpu, thread, val, ena, run); + +def stat__cycles(cpu, thread, time, val, ena, run): + store(time, "cycles", cpu, thread, val, ena, run); + +def stat__instructions(cpu, thread, time, val, ena, run): + store(time, "instructions", cpu, thread, val, ena, run); + +def stat__interval(time): + for cpu in cpus: + for thread in threads: + cyc = get(time, "cycles", cpu, thread) + ins = get(time, "instructions", cpu, thread) + cpi = 0 + + if ins != 0: + cpi = cyc/float(ins) + + print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins) + +def trace_end(): + pass +# XXX trace_end callback could be used as an alternative place +# to compute same values as in the script above: +# +# for time in times: +# for cpu in cpus: +# for thread in threads: +# cyc = get(time, "cycles", cpu, thread) +# ins = get(time, "instructions", cpu, thread) +# +# if ins != 0: +# cpi = cyc/float(ins) +# +# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi) diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore new file mode 100644 index 000000000000..8cc30e731c73 --- /dev/null +++ b/tools/perf/tests/.gitignore @@ -0,0 +1,4 @@ +llvm-src-base.c +llvm-src-kbuild.c +llvm-src-prologue.c +llvm-src-relocation.c diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 50de2253cff6..1ba628ed049a 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -31,8 +31,40 @@ perf-y += sample-parsing.o perf-y += parse-no-sample-id-all.o perf-y += kmod-path.o perf-y += thread-map.o -perf-y += llvm.o +perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o +perf-y += bpf.o perf-y += topology.o +perf-y += cpumap.o +perf-y += stat.o +perf-y += event_update.o + +$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + +$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + +$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ + +$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build + $(call rule_mkdir) + $(Q)echo '#include <tests/llvm.h>' > $@ + $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@ + $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@ + $(Q)echo ';' >> $@ ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 2dfc9ad0e6f2..28d1605b0338 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -24,7 +24,7 @@ #include <linux/kernel.h> #include "../perf.h" #include "util.h" -#include "exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "tests.h" #define ENV "PERF_TEST_ATTR" @@ -153,7 +153,7 @@ static int run_dir(const char *d, const char *perf) return system(cmd); } -int test__attr(void) +int test__attr(int subtest __maybe_unused) { struct stat st; char path_perf[PATH_MAX]; @@ -164,13 +164,12 @@ int test__attr(void) return run_dir("./tests", "./perf"); /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", perf_exec_path()); + snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); if (!lstat(path_dir, &st) && !lstat(path_perf, &st)) return run_dir(path_dir, path_perf); - fprintf(stderr, " (omitted)"); - return 0; + return TEST_SKIP; } diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index a02b035fd5aa..e7664fe3bd33 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -29,14 +29,59 @@ static int fd1; static int fd2; +static int fd3; static int overflows; +static int overflows_2; + +volatile long the_var; + + +/* + * Use ASM to ensure watchpoint and breakpoint can be triggered + * at one instruction. + */ +#if defined (__x86_64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "incq (%rdi)\n" + "ret\n"); +#elif defined (__aarch64__) +extern void __test_function(volatile long *ptr); +asm ( + ".globl __test_function\n" + "__test_function:\n" + "str x30, [x0]\n" + "ret\n"); + +#else +static void __test_function(volatile long *ptr) +{ + *ptr = 0x1234; +} +#endif __attribute__ ((noinline)) static int test_function(void) { + __test_function(&the_var); + the_var++; return time(NULL); } +static void sig_handler_2(int signum __maybe_unused, + siginfo_t *oh __maybe_unused, + void *uc __maybe_unused) +{ + overflows_2++; + if (overflows_2 > 10) { + ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); + } +} + static void sig_handler(int signum __maybe_unused, siginfo_t *oh __maybe_unused, void *uc __maybe_unused) @@ -54,10 +99,11 @@ static void sig_handler(int signum __maybe_unused, */ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); } } -static int bp_event(void *fn, int setup_signal) +static int __event(bool is_x, void *addr, int sig) { struct perf_event_attr pe; int fd; @@ -67,8 +113,8 @@ static int bp_event(void *fn, int setup_signal) pe.size = sizeof(struct perf_event_attr); pe.config = 0; - pe.bp_type = HW_BREAKPOINT_X; - pe.bp_addr = (unsigned long) fn; + pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W; + pe.bp_addr = (unsigned long) addr; pe.bp_len = sizeof(long); pe.sample_period = 1; @@ -86,17 +132,25 @@ static int bp_event(void *fn, int setup_signal) return TEST_FAIL; } - if (setup_signal) { - fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); - fcntl(fd, F_SETSIG, SIGIO); - fcntl(fd, F_SETOWN, getpid()); - } + fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); + fcntl(fd, F_SETSIG, sig); + fcntl(fd, F_SETOWN, getpid()); ioctl(fd, PERF_EVENT_IOC_RESET, 0); return fd; } +static int bp_event(void *addr, int sig) +{ + return __event(true, addr, sig); +} + +static int wp_event(void *addr, int sig) +{ + return __event(false, addr, sig); +} + static long long bp_count(int fd) { long long count; @@ -111,10 +165,10 @@ static long long bp_count(int fd) return count; } -int test__bp_signal(void) +int test__bp_signal(int subtest __maybe_unused) { struct sigaction sa; - long long count1, count2; + long long count1, count2, count3; /* setup SIGIO signal handler */ memset(&sa, 0, sizeof(struct sigaction)); @@ -126,21 +180,52 @@ int test__bp_signal(void) return TEST_FAIL; } + sa.sa_sigaction = (void *) sig_handler_2; + if (sigaction(SIGUSR1, &sa, NULL) < 0) { + pr_debug("failed setting up signal handler 2\n"); + return TEST_FAIL; + } + /* * We create following events: * - * fd1 - breakpoint event on test_function with SIGIO + * fd1 - breakpoint event on __test_function with SIGIO * signal configured. We should get signal * notification each time the breakpoint is hit * - * fd2 - breakpoint event on sig_handler without SIGIO + * fd2 - breakpoint event on sig_handler with SIGUSR1 + * configured. We should get SIGUSR1 each time when + * breakpoint is hit + * + * fd3 - watchpoint event on __test_function with SIGIO * configured. * * Following processing should happen: - * - execute test_function - * - fd1 event breakpoint hit -> count1 == 1 - * - SIGIO is delivered -> overflows == 1 - * - fd2 event breakpoint hit -> count2 == 1 + * Exec: Action: Result: + * incq (%rdi) - fd1 event breakpoint hit -> count1 == 1 + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 1 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 1 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 1 + * sys_rt_sigreturn - return from sig_handler + * incq (%rdi) - fd3 event watchpoint hit -> count3 == 1 (wp and bp in one insn) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 2 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 2 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows = 2 + * sys_rt_sigreturn - return from sig_handler + * the_var++ - fd3 event watchpoint hit -> count3 == 2 (standalone watchpoint) + * - SIGIO is delivered + * sig_handler - fd2 event breakpoint hit -> count2 == 3 + * - SIGUSR1 is delivered + * sig_handler_2 -> overflows_2 == 3 (nested signal) + * sys_rt_sigreturn - return from sig_handler_2 + * overflows++ -> overflows == 3 + * sys_rt_sigreturn - return from sig_handler * * The test case check following error conditions: * - we get stuck in signal handler because of debug @@ -152,11 +237,13 @@ int test__bp_signal(void) * */ - fd1 = bp_event(test_function, 1); - fd2 = bp_event(sig_handler, 0); + fd1 = bp_event(__test_function, SIGIO); + fd2 = bp_event(sig_handler, SIGUSR1); + fd3 = wp_event((void *)&the_var, SIGIO); ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0); ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0); /* * Kick off the test by trigering 'fd1' @@ -166,15 +253,18 @@ int test__bp_signal(void) ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0); + ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0); count1 = bp_count(fd1); count2 = bp_count(fd2); + count3 = bp_count(fd3); close(fd1); close(fd2); + close(fd3); - pr_debug("count1 %lld, count2 %lld, overflow %d\n", - count1, count2, overflows); + pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n", + count1, count2, count3, overflows, overflows_2); if (count1 != 1) { if (count1 == 11) @@ -183,12 +273,18 @@ int test__bp_signal(void) pr_debug("failed: wrong count for bp1%lld\n", count1); } - if (overflows != 1) + if (overflows != 3) pr_debug("failed: wrong overflow hit\n"); - if (count2 != 1) + if (overflows_2 != 3) + pr_debug("failed: wrong overflow_2 hit\n"); + + if (count2 != 3) pr_debug("failed: wrong count for bp2\n"); - return count1 == 1 && overflows == 1 && count2 == 1 ? + if (count3 != 2) + pr_debug("failed: wrong count for bp3\n"); + + return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ? TEST_OK : TEST_FAIL; } diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c index e76537724491..89f92fa67cc4 100644 --- a/tools/perf/tests/bp_signal_overflow.c +++ b/tools/perf/tests/bp_signal_overflow.c @@ -58,7 +58,7 @@ static long long bp_count(int fd) #define EXECUTIONS 10000 #define THRESHOLD 100 -int test__bp_signal_overflow(void) +int test__bp_signal_overflow(int subtest __maybe_unused) { struct perf_event_attr pe; struct sigaction sa; diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 410a70b93b93..0ec9c2c03164 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -1,3 +1,7 @@ +/* + * bpf-script-example.c + * Test basic LLVM building + */ #ifndef LINUX_VERSION_CODE # error Need LINUX_VERSION_CODE # error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c new file mode 100644 index 000000000000..3626924740d8 --- /dev/null +++ b/tools/perf/tests/bpf-script-test-kbuild.c @@ -0,0 +1,21 @@ +/* + * bpf-script-test-kbuild.c + * Test include from kernel header + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define SEC(NAME) __attribute__((section(NAME), used)) + +#include <uapi/linux/fs.h> +#include <uapi/asm/ptrace.h> + +SEC("func=vfs_llseek") +int bpf_func__vfs_llseek(void *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c new file mode 100644 index 000000000000..7230e62c70fc --- /dev/null +++ b/tools/perf/tests/bpf-script-test-prologue.c @@ -0,0 +1,35 @@ +/* + * bpf-script-test-prologue.c + * Test BPF prologue + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define SEC(NAME) __attribute__((section(NAME), used)) + +#include <uapi/linux/fs.h> + +#define FMODE_READ 0x1 +#define FMODE_WRITE 0x2 + +static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = + (void *) 6; + +SEC("func=null_lseek file->f_mode offset orig") +int bpf_func__null_lseek(void *ctx, int err, unsigned long f_mode, + unsigned long offset, unsigned long orig) +{ + if (err) + return 0; + if (f_mode & FMODE_WRITE) + return 0; + if (offset & 1) + return 0; + if (orig == SEEK_CUR) + return 0; + return 1; +} + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c new file mode 100644 index 000000000000..93af77421816 --- /dev/null +++ b/tools/perf/tests/bpf-script-test-relocation.c @@ -0,0 +1,50 @@ +/* + * bpf-script-test-relocation.c + * Test BPF loader checking relocation + */ +#ifndef LINUX_VERSION_CODE +# error Need LINUX_VERSION_CODE +# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig' +#endif +#define BPF_ANY 0 +#define BPF_MAP_TYPE_ARRAY 2 +#define BPF_FUNC_map_lookup_elem 1 +#define BPF_FUNC_map_update_elem 2 + +static void *(*bpf_map_lookup_elem)(void *map, void *key) = + (void *) BPF_FUNC_map_lookup_elem; +static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) = + (void *) BPF_FUNC_map_update_elem; + +struct bpf_map_def { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) +struct bpf_map_def SEC("maps") my_table = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 1, +}; + +int this_is_a_global_val; + +SEC("func=sys_write") +int bpf_func__sys_write(void *ctx) +{ + int key = 0; + int value = 0; + + /* + * Incorrect relocation. Should not allow this program be + * loaded into kernel. + */ + bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0); + return 0; +} +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = LINUX_VERSION_CODE; diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c new file mode 100644 index 000000000000..199501c71e27 --- /dev/null +++ b/tools/perf/tests/bpf.c @@ -0,0 +1,315 @@ +#include <stdio.h> +#include <sys/epoll.h> +#include <util/util.h> +#include <util/bpf-loader.h> +#include <util/evlist.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <bpf/bpf.h> +#include "tests.h" +#include "llvm.h" +#include "debug.h" +#define NR_ITERS 111 + +#ifdef HAVE_LIBBPF_SUPPORT + +static int epoll_pwait_loop(void) +{ + int i; + + /* Should fail NR_ITERS times */ + for (i = 0; i < NR_ITERS; i++) + epoll_pwait(-(i + 1), NULL, 0, 0, NULL); + return 0; +} + +#ifdef HAVE_BPF_PROLOGUE + +static int llseek_loop(void) +{ + int fds[2], i; + + fds[0] = open("/dev/null", O_RDONLY); + fds[1] = open("/dev/null", O_RDWR); + + if (fds[0] < 0 || fds[1] < 0) + return -1; + + for (i = 0; i < NR_ITERS; i++) { + lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET); + lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET); + } + close(fds[0]); + close(fds[1]); + return 0; +} + +#endif + +static struct { + enum test_llvm__testcase prog_id; + const char *desc; + const char *name; + const char *msg_compile_fail; + const char *msg_load_fail; + int (*target_func)(void); + int expect_result; +} bpf_testcase_table[] = { + { + LLVM_TESTCASE_BASE, + "Test basic BPF filtering", + "[basic_bpf_test]", + "fix 'perf test LLVM' first", + "load bpf object failed", + &epoll_pwait_loop, + (NR_ITERS + 1) / 2, + }, +#ifdef HAVE_BPF_PROLOGUE + { + LLVM_TESTCASE_BPF_PROLOGUE, + "Test BPF prologue generation", + "[bpf_prologue_test]", + "fix kbuild first", + "check your vmlinux setting?", + &llseek_loop, + (NR_ITERS + 1) / 4, + }, +#endif + { + LLVM_TESTCASE_BPF_RELOCATION, + "Test BPF relocation checker", + "[bpf_relocation_test]", + "fix 'perf test LLVM' first", + "libbpf error when dealing with relocation", + NULL, + 0, + }, +}; + +static int do_test(struct bpf_object *obj, int (*func)(void), + int expect) +{ + struct record_opts opts = { + .target = { + .uid = UINT_MAX, + .uses_mmap = true, + }, + .freq = 0, + .mmap_pages = 256, + .default_interval = 1, + }; + + char pid[16]; + char sbuf[STRERR_BUFSIZE]; + struct perf_evlist *evlist; + int i, ret = TEST_FAIL, err = 0, count = 0; + + struct parse_events_evlist parse_evlist; + struct parse_events_error parse_error; + + bzero(&parse_error, sizeof(parse_error)); + bzero(&parse_evlist, sizeof(parse_evlist)); + parse_evlist.error = &parse_error; + INIT_LIST_HEAD(&parse_evlist.list); + + err = parse_events_load_bpf_obj(&parse_evlist, &parse_evlist.list, obj, NULL); + if (err || list_empty(&parse_evlist.list)) { + pr_debug("Failed to add events selected by BPF\n"); + return TEST_FAIL; + } + + snprintf(pid, sizeof(pid), "%d", getpid()); + pid[sizeof(pid) - 1] = '\0'; + opts.target.tid = opts.target.pid = pid; + + /* Instead of perf_evlist__new_default, don't add default events */ + evlist = perf_evlist__new(); + if (!evlist) { + pr_debug("No ehough memory to create evlist\n"); + return TEST_FAIL; + } + + err = perf_evlist__create_maps(evlist, &opts.target); + if (err < 0) { + pr_debug("Not enough memory to create thread/cpu maps\n"); + goto out_delete_evlist; + } + + perf_evlist__splice_list_tail(evlist, &parse_evlist.list); + evlist->nr_groups = parse_evlist.nr_groups; + + perf_evlist__config(evlist, &opts); + + err = perf_evlist__open(evlist); + if (err < 0) { + pr_debug("perf_evlist__open: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + goto out_delete_evlist; + } + + err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + if (err < 0) { + pr_debug("perf_evlist__mmap: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + goto out_delete_evlist; + } + + perf_evlist__enable(evlist); + (*func)(); + perf_evlist__disable(evlist); + + for (i = 0; i < evlist->nr_mmaps; i++) { + union perf_event *event; + + while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { + const u32 type = event->header.type; + + if (type == PERF_RECORD_SAMPLE) + count ++; + } + } + + if (count != expect) { + pr_debug("BPF filter result incorrect\n"); + goto out_delete_evlist; + } + + ret = TEST_OK; + +out_delete_evlist: + perf_evlist__delete(evlist); + return ret; +} + +static struct bpf_object * +prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name) +{ + struct bpf_object *obj; + + obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, name); + if (IS_ERR(obj)) { + pr_debug("Compile BPF program failed.\n"); + return NULL; + } + return obj; +} + +static int __test__bpf(int idx) +{ + int ret; + void *obj_buf; + size_t obj_buf_sz; + struct bpf_object *obj; + + ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, + bpf_testcase_table[idx].prog_id, + true, NULL); + if (ret != TEST_OK || !obj_buf || !obj_buf_sz) { + pr_debug("Unable to get BPF object, %s\n", + bpf_testcase_table[idx].msg_compile_fail); + if (idx == 0) + return TEST_SKIP; + else + return TEST_FAIL; + } + + obj = prepare_bpf(obj_buf, obj_buf_sz, + bpf_testcase_table[idx].name); + if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) { + if (!obj) + pr_debug("Fail to load BPF object: %s\n", + bpf_testcase_table[idx].msg_load_fail); + else + pr_debug("Success unexpectedly: %s\n", + bpf_testcase_table[idx].msg_load_fail); + ret = TEST_FAIL; + goto out; + } + + if (obj) + ret = do_test(obj, + bpf_testcase_table[idx].target_func, + bpf_testcase_table[idx].expect_result); +out: + bpf__clear(); + return ret; +} + +int test__bpf_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(bpf_testcase_table); +} + +const char *test__bpf_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table)) + return NULL; + return bpf_testcase_table[i].desc; +} + +static int check_env(void) +{ + int err; + unsigned int kver_int; + char license[] = "GPL"; + + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }; + + err = fetch_kernel_version(&kver_int, NULL, 0); + if (err) { + pr_debug("Unable to get kernel version\n"); + return err; + } + + err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns, + sizeof(insns) / sizeof(insns[0]), + license, kver_int, NULL, 0); + if (err < 0) { + pr_err("Missing basic BPF support, skip this test: %s\n", + strerror(errno)); + return err; + } + close(err); + + return 0; +} + +int test__bpf(int i) +{ + int err; + + if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table)) + return TEST_FAIL; + + if (geteuid() != 0) { + pr_debug("Only root can run BPF test\n"); + return TEST_SKIP; + } + + if (check_env()) + return TEST_SKIP; + + err = __test__bpf(i); + return err; +} + +#else +int test__bpf_subtest_get_nr(void) +{ + return 0; +} + +const char *test__bpf_subtest_get_desc(int i __maybe_unused) +{ + return NULL; +} + +int test__bpf(int i __maybe_unused) +{ + pr_debug("Skip BPF test because BPF support is not compiled\n"); + return TEST_SKIP; +} +#endif diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 66f72d3d6677..f2b1dcac45d3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -11,7 +11,7 @@ #include "tests.h" #include "debug.h" #include "color.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "symbol.h" struct test __weak arch_tests[] = { @@ -160,12 +160,50 @@ static struct test generic_tests[] = { { .desc = "Test LLVM searching and compiling", .func = test__llvm, + .subtest = { + .skip_if_fail = true, + .get_nr = test__llvm_subtest_get_nr, + .get_desc = test__llvm_subtest_get_desc, + }, }, { .desc = "Test topology in session", .func = test_session_topology, }, { + .desc = "Test BPF filter", + .func = test__bpf, + .subtest = { + .skip_if_fail = true, + .get_nr = test__bpf_subtest_get_nr, + .get_desc = test__bpf_subtest_get_desc, + }, + }, + { + .desc = "Test thread map synthesize", + .func = test__thread_map_synthesize, + }, + { + .desc = "Test cpu map synthesize", + .func = test__cpu_map_synthesize, + }, + { + .desc = "Test stat config synthesize", + .func = test__synthesize_stat_config, + }, + { + .desc = "Test stat synthesize", + .func = test__synthesize_stat, + }, + { + .desc = "Test stat round synthesize", + .func = test__synthesize_stat_round, + }, + { + .desc = "Test attr update synthesize", + .func = test__event_update, + }, + { .func = NULL, }, }; @@ -192,14 +230,14 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char continue; } - if (strstr(test->desc, argv[i])) + if (strcasestr(test->desc, argv[i])) return true; } return false; } -static int run_test(struct test *test) +static int run_test(struct test *test, int subtest) { int status, err = -1, child = fork(); char sbuf[STRERR_BUFSIZE]; @@ -212,7 +250,22 @@ static int run_test(struct test *test) if (!child) { pr_debug("test child forked, pid %d\n", getpid()); - err = test->func(); + if (!verbose) { + int nullfd = open("/dev/null", O_WRONLY); + if (nullfd >= 0) { + close(STDERR_FILENO); + close(STDOUT_FILENO); + + dup2(nullfd, STDOUT_FILENO); + dup2(STDOUT_FILENO, STDERR_FILENO); + close(nullfd); + } + } else { + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + } + + err = test->func(subtest); exit(err); } @@ -233,6 +286,40 @@ static int run_test(struct test *test) for (j = 0; j < ARRAY_SIZE(tests); j++) \ for (t = &tests[j][0]; t->func; t++) +static int test_and_print(struct test *t, bool force_skip, int subtest) +{ + int err; + + if (!force_skip) { + pr_debug("\n--- start ---\n"); + err = run_test(t, subtest); + pr_debug("---- end ----\n"); + } else { + pr_debug("\n--- force skipped ---\n"); + err = TEST_SKIP; + } + + if (!t->subtest.get_nr) + pr_debug("%s:", t->desc); + else + pr_debug("%s subtest %d:", t->desc, subtest); + + switch (err) { + case TEST_OK: + pr_info(" Ok\n"); + break; + case TEST_SKIP: + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); + break; + case TEST_FAIL: + default: + color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); + break; + } + + return err; +} + static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) { struct test *t; @@ -260,21 +347,43 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) continue; } - pr_debug("\n--- start ---\n"); - err = run_test(t); - pr_debug("---- end ----\n%s:", t->desc); - - switch (err) { - case TEST_OK: - pr_info(" Ok\n"); - break; - case TEST_SKIP: - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n"); - break; - case TEST_FAIL: - default: - color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n"); - break; + if (!t->subtest.get_nr) { + test_and_print(t, false, -1); + } else { + int subn = t->subtest.get_nr(); + /* + * minus 2 to align with normal testcases. + * For subtest we print additional '.x' in number. + * for example: + * + * 35: Test LLVM searching and compiling : + * 35.1: Basic BPF llvm compiling test : Ok + */ + int subw = width > 2 ? width - 2 : width; + bool skip = false; + int subi; + + if (subn <= 0) { + color_fprintf(stderr, PERF_COLOR_YELLOW, + " Skip (not compiled in)\n"); + continue; + } + pr_info("\n"); + + for (subi = 0; subi < subn; subi++) { + int len = strlen(t->subtest.get_desc(subi)); + + if (subw < len) + subw = len; + } + + for (subi = 0; subi < subn; subi++) { + pr_info("%2d.%1d: %-*s:", i, subi + 1, subw, + t->subtest.get_desc(subi)); + err = test_and_print(t, skip, subi); + if (err != TEST_OK && t->subtest.skip_if_fail) + skip = true; + } } } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 49b1959dda41..afc9ad0a0515 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -433,14 +433,13 @@ enum { static int do_test_code_reading(bool try_kcore) { - struct machines machines; struct machine *machine; struct thread *thread; struct record_opts opts = { .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, - .freq = 4000, + .freq = 500, .target = { .uses_mmap = true, }, @@ -459,8 +458,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); - machines__init(&machines); - machine = &machines.host; + machine = machine__new_host(); ret = machine__create_kernel_maps(machine); if (ret < 0) { @@ -549,12 +547,25 @@ static int do_test_code_reading(bool try_kcore) if (ret < 0) { if (!excl_kernel) { excl_kernel = true; + /* + * Both cpus and threads are now owned by evlist + * and will be freed by following perf_evlist__set_maps + * call. Getting refference to keep them alive. + */ + cpu_map__get(cpus); + thread_map__get(threads); perf_evlist__set_maps(evlist, NULL, NULL); perf_evlist__delete(evlist); evlist = NULL; continue; } - pr_debug("perf_evlist__open failed\n"); + + if (verbose) { + char errbuf[512]; + perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); + pr_debug("perf_evlist__open() failed!\n%s\n", errbuf); + } + goto out_put; } break; @@ -594,14 +605,13 @@ out_err: cpu_map__put(cpus); thread_map__put(threads); } - machines__destroy_kernel_maps(&machines); machine__delete_threads(machine); - machines__exit(&machines); + machine__delete(machine); return err; } -int test__code_reading(void) +int test__code_reading(int subtest __maybe_unused) { int ret; @@ -613,16 +623,16 @@ int test__code_reading(void) case TEST_CODE_READING_OK: return 0; case TEST_CODE_READING_NO_VMLINUX: - fprintf(stderr, " (no vmlinux)"); + pr_debug("no vmlinux\n"); return 0; case TEST_CODE_READING_NO_KCORE: - fprintf(stderr, " (no kcore)"); + pr_debug("no kcore\n"); return 0; case TEST_CODE_READING_NO_ACCESS: - fprintf(stderr, " (no access)"); + pr_debug("no access\n"); return 0; case TEST_CODE_READING_NO_KERNEL_OBJ: - fprintf(stderr, " (no kernel obj)"); + pr_debug("no kernel obj\n"); return 0; default: return -1; diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c new file mode 100644 index 000000000000..4cb6418a8ffc --- /dev/null +++ b/tools/perf/tests/cpumap.c @@ -0,0 +1,88 @@ +#include "tests.h" +#include "cpumap.h" + +static int process_event_mask(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct cpu_map_event *map_event = &event->cpu_map; + struct cpu_map_mask *mask; + struct cpu_map_data *data; + struct cpu_map *map; + int i; + + data = &map_event->data; + + TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK); + + mask = (struct cpu_map_mask *)data->data; + + TEST_ASSERT_VAL("wrong nr", mask->nr == 1); + + for (i = 0; i < 20; i++) { + TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask)); + } + + map = cpu_map__new_data(data); + TEST_ASSERT_VAL("wrong nr", map->nr == 20); + + for (i = 0; i < 20; i++) { + TEST_ASSERT_VAL("wrong cpu", map->map[i] == i); + } + + cpu_map__put(map); + return 0; +} + +static int process_event_cpus(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct cpu_map_event *map_event = &event->cpu_map; + struct cpu_map_entries *cpus; + struct cpu_map_data *data; + struct cpu_map *map; + + data = &map_event->data; + + TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS); + + cpus = (struct cpu_map_entries *)data->data; + + TEST_ASSERT_VAL("wrong nr", cpus->nr == 2); + TEST_ASSERT_VAL("wrong cpu", cpus->cpu[0] == 1); + TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256); + + map = cpu_map__new_data(data); + TEST_ASSERT_VAL("wrong nr", map->nr == 2); + TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1); + TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256); + TEST_ASSERT_VAL("wrong refcnt", atomic_read(&map->refcnt) == 1); + cpu_map__put(map); + return 0; +} + + +int test__cpu_map_synthesize(int subtest __maybe_unused) +{ + struct cpu_map *cpus; + + /* This one is better stores in mask. */ + cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19"); + + TEST_ASSERT_VAL("failed to synthesize map", + !perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL)); + + cpu_map__put(cpus); + + /* This one is better stores in cpu values. */ + cpus = cpu_map__new("1,256"); + + TEST_ASSERT_VAL("failed to synthesize map", + !perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL)); + + cpu_map__put(cpus); + return 0; +} diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index a218aeaf56a0..dc673ff7c437 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -110,7 +110,7 @@ static int dso__data_fd(struct dso *dso, struct machine *machine) return fd; } -int test__dso_data(void) +int test__dso_data(int subtest __maybe_unused) { struct machine machine; struct dso *dso; @@ -245,7 +245,7 @@ static int set_fd_limit(int n) return setrlimit(RLIMIT_NOFILE, &rlim); } -int test__dso_data_cache(void) +int test__dso_data_cache(int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(); @@ -302,7 +302,7 @@ int test__dso_data_cache(void) return 0; } -int test__dso_data_reopen(void) +int test__dso_data_reopen(int subtest __maybe_unused) { struct machine machine; long nr_end, nr = open_files_cnt(); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 07221793a3ac..1c5c0221cea2 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -51,6 +51,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) "krava_1", "test__dwarf_unwind" }; + /* + * The funcs[MAX_STACK] array index, based on the + * callchain order setup. + */ + int idx = callchain_param.order == ORDER_CALLER ? + MAX_STACK - *cnt - 1 : *cnt; if (*cnt >= MAX_STACK) { pr_debug("failed: crossed the max stack value %d\n", MAX_STACK); @@ -63,8 +69,10 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return -1; } - pr_debug("got: %s 0x%" PRIx64 "\n", symbol, entry->ip); - return strcmp((const char *) symbol, funcs[(*cnt)++]); + (*cnt)++; + pr_debug("got: %s 0x%" PRIx64 ", expecting %s\n", + symbol, entry->ip, funcs[idx]); + return strcmp((const char *) symbol, funcs[idx]); } __attribute__ ((noinline)) @@ -105,8 +113,16 @@ static int compare(void *p1, void *p2) /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; - if (global_unwind_retval == -INT_MAX) + if (global_unwind_retval == -INT_MAX) { + /* Call unwinder twice for both callchain orders. */ + callchain_param.order = ORDER_CALLER; + global_unwind_retval = unwind_thread(thread); + if (!global_unwind_retval) { + callchain_param.order = ORDER_CALLEE; + global_unwind_retval = unwind_thread(thread); + } + } return p1 - p2; } @@ -142,21 +158,23 @@ static int krava_1(struct thread *thread) return krava_2(thread); } -int test__dwarf_unwind(void) +int test__dwarf_unwind(int subtest __maybe_unused) { - struct machines machines; struct machine *machine; struct thread *thread; int err = -1; - machines__init(&machines); - - machine = machines__find(&machines, HOST_KERNEL_ID); + machine = machine__new_host(); if (!machine) { pr_err("Could not get machine\n"); return -1; } + if (machine__create_kernel_maps(machine)) { + pr_err("Failed to create kernel maps\n"); + return -1; + } + callchain_param.record_mode = CALLCHAIN_DWARF; if (init_live_machine(machine)) { @@ -178,7 +196,6 @@ int test__dwarf_unwind(void) out: machine__delete_threads(machine); - machine__exit(machine); - machines__exit(&machines); + machine__delete(machine); return err; } diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c new file mode 100644 index 000000000000..012eab5d1df1 --- /dev/null +++ b/tools/perf/tests/event_update.c @@ -0,0 +1,117 @@ +#include <linux/compiler.h> +#include "evlist.h" +#include "evsel.h" +#include "machine.h" +#include "tests.h" +#include "debug.h" + +static int process_event_unit(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct event_update_event *ev = (struct event_update_event *) event; + + TEST_ASSERT_VAL("wrong id", ev->id == 123); + TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT); + TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA")); + return 0; +} + +static int process_event_scale(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct event_update_event *ev = (struct event_update_event *) event; + struct event_update_event_scale *ev_data; + + ev_data = (struct event_update_event_scale *) ev->data; + + TEST_ASSERT_VAL("wrong id", ev->id == 123); + TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE); + TEST_ASSERT_VAL("wrong scale", ev_data->scale = 0.123); + return 0; +} + +struct event_name { + struct perf_tool tool; + const char *name; +}; + +static int process_event_name(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct event_name *tmp = container_of(tool, struct event_name, tool); + struct event_update_event *ev = (struct event_update_event*) event; + + TEST_ASSERT_VAL("wrong id", ev->id == 123); + TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME); + TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name)); + return 0; +} + +static int process_event_cpus(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct event_update_event *ev = (struct event_update_event*) event; + struct event_update_event_cpus *ev_data; + struct cpu_map *map; + + ev_data = (struct event_update_event_cpus*) ev->data; + + map = cpu_map__new_data(&ev_data->cpus); + + TEST_ASSERT_VAL("wrong id", ev->id == 123); + TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); + TEST_ASSERT_VAL("wrong cpus", map->nr == 3); + TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1); + TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2); + TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3); + cpu_map__put(map); + return 0; +} + +int test__event_update(int subtest __maybe_unused) +{ + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct event_name tmp; + + evlist = perf_evlist__new_default(); + TEST_ASSERT_VAL("failed to get evlist", evlist); + + evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("failed to allos ids", + !perf_evsel__alloc_id(evsel, 1, 1)); + + perf_evlist__id_add(evlist, evsel, 0, 0, 123); + + evsel->unit = strdup("KRAVA"); + + TEST_ASSERT_VAL("failed to synthesize attr update unit", + !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit)); + + evsel->scale = 0.123; + + TEST_ASSERT_VAL("failed to synthesize attr update scale", + !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale)); + + tmp.name = perf_evsel__name(evsel); + + TEST_ASSERT_VAL("failed to synthesize attr update name", + !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name)); + + evsel->own_cpus = cpu_map__new("1,2,3"); + + TEST_ASSERT_VAL("failed to synthesize attr update cpus", + !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus)); + + cpu_map__put(evsel->own_cpus); + return 0; +} diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index 3fa715987a5e..2de4a4f2c3ed 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -95,7 +95,7 @@ out_delete_evlist: #define perf_evsel__name_array_test(names) \ __perf_evsel__name_array_test(names, ARRAY_SIZE(names)) -int test__perf_evsel__roundtrip_name_test(void) +int test__perf_evsel__roundtrip_name_test(int subtest __maybe_unused) { int err = 0, ret = 0; @@ -103,7 +103,8 @@ int test__perf_evsel__roundtrip_name_test(void) if (err) ret = err; - err = perf_evsel__name_array_test(perf_evsel__sw_names); + err = __perf_evsel__name_array_test(perf_evsel__sw_names, + PERF_COUNT_SW_DUMMY + 1); if (err) ret = err; diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 790e413d9a1f..1984b3bbfe15 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -32,7 +32,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return ret; } -int test__perf_evsel__tp_sched_test(void) +int test__perf_evsel__tp_sched_test(int subtest __maybe_unused) { struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch"); int ret = 0; diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c index d24b837951d4..c809463edbe5 100644 --- a/tools/perf/tests/fdarray.c +++ b/tools/perf/tests/fdarray.c @@ -25,7 +25,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE return printed + fdarray__fprintf(fda, fp); } -int test__fdarray__filter(void) +int test__fdarray__filter(int subtest __maybe_unused) { int nr_fds, expected_fd[2], fd, err = TEST_FAIL; struct fdarray *fda = fdarray__new(5, 5); @@ -103,7 +103,7 @@ out: return err; } -int test__fdarray__add(void) +int test__fdarray__add(int subtest __maybe_unused) { int err = TEST_FAIL; struct fdarray *fda = fdarray__new(2, 2); diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index ce80b274b097..071a8b5f5232 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -150,7 +150,6 @@ struct machine *setup_fake_machine(struct machines *machines) out: pr_debug("Not enough memory for machine setup\n"); machine__delete_threads(machine); - machine__delete(machine); return NULL; } diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7ed737019de7..ecf136c385d5 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -191,7 +191,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec * function since TEST_ASSERT_VAL() returns in case of failure. */ hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(hists_to_evsel(hists), NULL); if (verbose > 2) { pr_info("use callchain: %d, cumulate callchain: %d\n", @@ -281,7 +281,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = false; perf_evsel__reset_sample_bit(evsel, CALLCHAIN); - setup_sorting(); + setup_sorting(NULL); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -428,7 +428,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = false; perf_evsel__set_sample_bit(evsel, CALLCHAIN); - setup_sorting(); + setup_sorting(NULL); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -486,7 +486,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = true; perf_evsel__reset_sample_bit(evsel, CALLCHAIN); - setup_sorting(); + setup_sorting(NULL); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -670,7 +670,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) symbol_conf.cumulate_callchain = true; perf_evsel__set_sample_bit(evsel, CALLCHAIN); - setup_sorting(); + setup_sorting(NULL); callchain_register_param(&callchain_param); err = add_hist_entries(hists, machine); @@ -686,7 +686,7 @@ out: return err; } -int test__hists_cumulate(void) +int test__hists_cumulate(int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -706,6 +706,7 @@ int test__hists_cumulate(void) err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; + err = TEST_FAIL; machines__init(&machines); diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 818acf875dd0..34b945a55d4d 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -104,7 +104,7 @@ out: return TEST_FAIL; } -int test__hists_filter(void) +int test__hists_filter(int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -120,9 +120,10 @@ int test__hists_filter(void) err = parse_events(evlist, "task-clock", NULL); if (err) goto out; + err = TEST_FAIL; /* default sort order (comm,dso,sym) will be used */ - if (setup_sorting() < 0) + if (setup_sorting(NULL) < 0) goto out; machines__init(&machines); @@ -144,7 +145,7 @@ int test__hists_filter(void) struct hists *hists = evsel__hists(evsel); hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("Normal histogram\n"); diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 8c102b011424..64b257d8d557 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -64,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) struct perf_evsel *evsel; struct addr_location al; struct hist_entry *he; - struct perf_sample sample = { .period = 1, }; + struct perf_sample sample = { .period = 1, .weight = 1, }; size_t i = 0, k; /* @@ -90,7 +90,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(hists, &al, NULL, - NULL, NULL, 1, 1, 0, true); + NULL, NULL, &sample, true); if (he == NULL) { addr_location__put(&al); goto out; @@ -116,7 +116,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) goto out; he = __hists__add_entry(hists, &al, NULL, - NULL, NULL, 1, 1, 0, true); + NULL, NULL, &sample, true); if (he == NULL) { addr_location__put(&al); goto out; @@ -274,7 +274,7 @@ static int validate_link(struct hists *leader, struct hists *other) return __validate_link(leader, 0) || __validate_link(other, 1); } -int test__hists_link(void) +int test__hists_link(int subtest __maybe_unused) { int err = -1; struct hists *hists, *first_hists; @@ -293,8 +293,9 @@ int test__hists_link(void) if (err) goto out; + err = TEST_FAIL; /* default sort order (comm,dso,sym) will be used */ - if (setup_sorting() < 0) + if (setup_sorting(NULL) < 0) goto out; machines__init(&machines); diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index adbebc852cc8..23cce67c7e48 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -134,7 +134,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) field_order = NULL; sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */ - setup_sorting(); + setup_sorting(NULL); /* * expected output: @@ -156,7 +156,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -236,7 +236,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) field_order = "overhead,cpu"; sort_order = "pid"; - setup_sorting(); + setup_sorting(NULL); /* * expected output: @@ -256,7 +256,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -292,7 +292,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) field_order = "comm,overhead,dso"; sort_order = NULL; - setup_sorting(); + setup_sorting(NULL); /* * expected output: @@ -310,7 +310,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -366,7 +366,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) field_order = "dso,sym,comm,overhead,dso"; sort_order = "sym"; - setup_sorting(); + setup_sorting(NULL); /* * expected output: @@ -388,7 +388,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -468,7 +468,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) field_order = "cpu,pid,comm,dso,sym"; sort_order = "dso,pid"; - setup_sorting(); + setup_sorting(NULL); /* * expected output: @@ -491,7 +491,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) goto out; hists__collapse_resort(hists, NULL); - hists__output_resort(hists, NULL); + perf_evsel__output_resort(evsel, NULL); if (verbose > 2) { pr_info("[fields = %s, sort = %s]\n", field_order, sort_order); @@ -576,7 +576,7 @@ out: return err; } -int test__hists_output(void) +int test__hists_output(int subtest __maybe_unused) { int err = TEST_FAIL; struct machines machines; @@ -597,6 +597,7 @@ int test__hists_output(void) err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; + err = TEST_FAIL; machines__init(&machines); diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 4d4b9837b630..ddb78fae064a 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -49,13 +49,12 @@ static int find_comm(struct perf_evlist *evlist, const char *comm) * when an event is disabled but a dummy software event is not disabled. If the * test passes %0 is returned, otherwise %-1 is returned. */ -int test__keep_tracking(void) +int test__keep_tracking(int subtest __maybe_unused) { struct record_opts opts = { .mmap_pages = UINT_MAX, .user_freq = UINT_MAX, .user_interval = ULLONG_MAX, - .freq = 4000, .target = { .uses_mmap = true, }, @@ -90,8 +89,8 @@ int test__keep_tracking(void) evsel->attr.enable_on_exec = 0; if (perf_evlist__open(evlist) < 0) { - fprintf(stderr, " (not supported)"); - err = 0; + pr_debug("Unable to open dummy and cycles event\n"); + err = TEST_SKIP; goto out_err; } @@ -124,7 +123,7 @@ int test__keep_tracking(void) evsel = perf_evlist__last(evlist); - CHECK__(perf_evlist__disable_event(evlist, evsel)); + CHECK__(perf_evsel__disable(evsel)); comm = "Test COMM 2"; CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0)); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index 08c433b4bf4f..d2af78193153 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -49,7 +49,7 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect) #define M(path, c, e) \ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) -int test__kmod_path__parse(void) +int test__kmod_path__parse(int subtest __maybe_unused) { /* path alloc_name alloc_ext kmod comp name ext */ T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c index 52d55971f66f..cff564fb4b66 100644 --- a/tools/perf/tests/llvm.c +++ b/tools/perf/tests/llvm.c @@ -2,97 +2,169 @@ #include <bpf/libbpf.h> #include <util/llvm-utils.h> #include <util/cache.h> +#include "llvm.h" #include "tests.h" #include "debug.h" -static int perf_config_cb(const char *var, const char *val, - void *arg __maybe_unused) -{ - return perf_default_config(var, val, arg); -} - -/* - * Randomly give it a "version" section since we don't really load it - * into kernel - */ -static const char test_bpf_prog[] = - "__attribute__((section(\"do_fork\"), used)) " - "int fork(void *ctx) {return 0;} " - "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";" - "int _version __attribute__((section(\"version\"), used)) = 0x40100;"; - #ifdef HAVE_LIBBPF_SUPPORT static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) { struct bpf_object *obj; obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); - if (!obj) - return -1; + if (IS_ERR(obj)) + return TEST_FAIL; bpf_object__close(obj); - return 0; + return TEST_OK; } #else static int test__bpf_parsing(void *obj_buf __maybe_unused, size_t obj_buf_sz __maybe_unused) { - fprintf(stderr, " (skip bpf parsing)"); - return 0; + pr_debug("Skip bpf parsing\n"); + return TEST_OK; } #endif -int test__llvm(void) +static struct { + const char *source; + const char *desc; + bool should_load_fail; +} bpf_source_table[__LLVM_TESTCASE_MAX] = { + [LLVM_TESTCASE_BASE] = { + .source = test_llvm__bpf_base_prog, + .desc = "Basic BPF llvm compiling test", + }, + [LLVM_TESTCASE_KBUILD] = { + .source = test_llvm__bpf_test_kbuild_prog, + .desc = "Test kbuild searching", + }, + [LLVM_TESTCASE_BPF_PROLOGUE] = { + .source = test_llvm__bpf_test_prologue_prog, + .desc = "Compile source for BPF prologue generation test", + }, + [LLVM_TESTCASE_BPF_RELOCATION] = { + .source = test_llvm__bpf_test_relocation, + .desc = "Compile source for BPF relocation test", + .should_load_fail = true, + }, +}; + +int +test_llvm__fetch_bpf_obj(void **p_obj_buf, + size_t *p_obj_buf_sz, + enum test_llvm__testcase idx, + bool force, + bool *should_load_fail) { - char *tmpl_new, *clang_opt_new; - void *obj_buf; - size_t obj_buf_sz; - int err, old_verbose; + const char *source; + const char *desc; + const char *tmpl_old, *clang_opt_old; + char *tmpl_new = NULL, *clang_opt_new = NULL; + int err, old_verbose, ret = TEST_FAIL; + + if (idx >= __LLVM_TESTCASE_MAX) + return TEST_FAIL; - perf_config(perf_config_cb, NULL); + source = bpf_source_table[idx].source; + desc = bpf_source_table[idx].desc; + if (should_load_fail) + *should_load_fail = bpf_source_table[idx].should_load_fail; /* * Skip this test if user's .perfconfig doesn't set [llvm] section * and clang is not found in $PATH, and this is not perf test -v */ - if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) { - fprintf(stderr, " (no clang, try 'perf test -v LLVM')"); + if (!force && (verbose == 0 && + !llvm_param.user_set_param && + llvm__search_clang())) { + pr_debug("No clang and no verbosive, skip this test\n"); return TEST_SKIP; } - old_verbose = verbose; /* * llvm is verbosity when error. Suppress all error output if * not 'perf test -v'. */ + old_verbose = verbose; if (verbose == 0) verbose = -1; + *p_obj_buf = NULL; + *p_obj_buf_sz = 0; + if (!llvm_param.clang_bpf_cmd_template) - return -1; + goto out; if (!llvm_param.clang_opt) llvm_param.clang_opt = strdup(""); - err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog, - llvm_param.clang_bpf_cmd_template); + err = asprintf(&tmpl_new, "echo '%s' | %s%s", source, + llvm_param.clang_bpf_cmd_template, + old_verbose ? "" : " 2>/dev/null"); if (err < 0) - return -1; + goto out; err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt); if (err < 0) - return -1; + goto out; + tmpl_old = llvm_param.clang_bpf_cmd_template; llvm_param.clang_bpf_cmd_template = tmpl_new; + clang_opt_old = llvm_param.clang_opt; llvm_param.clang_opt = clang_opt_new; - err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz); + + err = llvm__compile_bpf("-", p_obj_buf, p_obj_buf_sz); + + llvm_param.clang_bpf_cmd_template = tmpl_old; + llvm_param.clang_opt = clang_opt_old; verbose = old_verbose; - if (err) { - if (!verbose) - fprintf(stderr, " (use -v to see error message)"); - return -1; - } + if (err) + goto out; + + ret = TEST_OK; +out: + free(tmpl_new); + free(clang_opt_new); + if (ret != TEST_OK) + pr_debug("Failed to compile test case: '%s'\n", desc); + return ret; +} - err = test__bpf_parsing(obj_buf, obj_buf_sz); +int test__llvm(int subtest) +{ + int ret; + void *obj_buf = NULL; + size_t obj_buf_sz = 0; + bool should_load_fail = false; + + if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) + return TEST_FAIL; + + ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz, + subtest, false, &should_load_fail); + + if (ret == TEST_OK && !should_load_fail) { + ret = test__bpf_parsing(obj_buf, obj_buf_sz); + if (ret != TEST_OK) { + pr_debug("Failed to parse test case '%s'\n", + bpf_source_table[subtest].desc); + } + } free(obj_buf); - return err; + + return ret; +} + +int test__llvm_subtest_get_nr(void) +{ + return __LLVM_TESTCASE_MAX; +} + +const char *test__llvm_subtest_get_desc(int subtest) +{ + if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX)) + return NULL; + + return bpf_source_table[subtest].desc; } diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h new file mode 100644 index 000000000000..0eaa604be99d --- /dev/null +++ b/tools/perf/tests/llvm.h @@ -0,0 +1,23 @@ +#ifndef PERF_TEST_LLVM_H +#define PERF_TEST_LLVM_H + +#include <stddef.h> /* for size_t */ +#include <stdbool.h> /* for bool */ + +extern const char test_llvm__bpf_base_prog[]; +extern const char test_llvm__bpf_test_kbuild_prog[]; +extern const char test_llvm__bpf_test_prologue_prog[]; +extern const char test_llvm__bpf_test_relocation[]; + +enum test_llvm__testcase { + LLVM_TESTCASE_BASE, + LLVM_TESTCASE_KBUILD, + LLVM_TESTCASE_BPF_PROLOGUE, + LLVM_TESTCASE_BPF_RELOCATION, + __LLVM_TESTCASE_MAX, +}; + +int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz, + enum test_llvm__testcase index, bool force, + bool *should_load_fail); +#endif diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 2cbd0c6901e3..cac15d93aea6 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -1,9 +1,11 @@ +include ../scripts/Makefile.include + ifndef MK ifeq ($(MAKECMDGOALS),) # no target specified, trigger the whole suite all: @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile - @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf + @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1 else # run only specific test over 'Makefile' %: @@ -11,8 +13,41 @@ else endif else PERF := . +PERF_O := $(PERF) +O_OPT := +FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O)) + +ifneq ($(O),) + FULL_O := $(shell readlink -f $(O) || echo $(O)) + PERF_O := $(FULL_O) + ifeq ($(SET_O),1) + O_OPT := 'O=$(FULL_O)' + endif + K_O_OPT := 'O=$(FULL_O)' +endif + +PARALLEL_OPT= +ifeq ($(SET_PARALLEL),1) + cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) + ifeq ($(cores),0) + cores := 1 + endif + PARALLEL_OPT="-j$(cores)" +endif + +# As per kernel Makefile, avoid funny character set dependencies +unexport LC_ALL +LC_COLLATE=C +LC_NUMERIC=C +export LC_COLLATE LC_NUMERIC -include config/Makefile.arch +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +include $(srctree)/tools/scripts/Makefile.arch # FIXME looks like x86 is the only arch running tests ;-) # we need some IS_(32/64) flag to make this generic @@ -45,6 +80,7 @@ make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 make_no_auxtrace := NO_AUXTRACE=1 make_no_libbpf := NO_LIBBPF=1 +make_no_libcrypto := NO_LIBCRYPTO=1 make_tags := tags make_cscope := cscope make_help := help @@ -68,6 +104,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 +make_minimal += NO_LIBCRYPTO=1 # $(run) contains all available tests run := make_pure @@ -76,6 +113,9 @@ run := make_pure # disable features detection ifeq ($(MK),Makefile) run += make_clean_all +MAKE_F := $(MAKE) +else +MAKE_F := $(MAKE) -f $(MK) endif run += make_python_perf_so run += make_debug @@ -142,11 +182,11 @@ test_make_doc := $(test_ok) test_make_help_O := $(test_ok) test_make_doc_O := $(test_ok) -test_make_python_perf_so := test -f $(PERF)/python/perf.so +test_make_python_perf_so := test -f $(PERF_O)/python/perf.so -test_make_perf_o := test -f $(PERF)/perf.o -test_make_util_map_o := test -f $(PERF)/util/map.o -test_make_util_pmu_bison_o := test -f $(PERF)/util/pmu-bison.o +test_make_perf_o := test -f $(PERF_O)/perf.o +test_make_util_map_o := test -f $(PERF_O)/util/map.o +test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o define test_dest_files for file in $(1); do \ @@ -213,7 +253,7 @@ test_make_perf_o_O := test -f $$TMP_O/perf.o test_make_util_map_o_O := test -f $$TMP_O/util/map.o test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o -test_default = test -x $(PERF)/perf +test_default = test -x $(PERF_O)/perf test = $(if $(test_$1),$(test_$1),$(test_default)) test_default_O = test -x $$TMP_O/perf @@ -221,6 +261,13 @@ test_O = $(if $(test_$1),$(test_$1),$(test_default_O)) all: +ifdef SHUF +run := $(shell shuf -e $(run)) +run_O := $(shell shuf -e $(run_O)) +endif + +max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L) + ifdef DEBUG d := $(info run $(run)) d := $(info run_O $(run_O)) @@ -228,13 +275,13 @@ endif MAKEFLAGS := --no-print-directory -clean := @(cd $(PERF); make -s -f $(MK) clean >/dev/null) +clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null) $(run): $(call clean) @TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) DESTDIR=$$TMP_DEST $($@)"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1; \ echo " test: $(call test,$@)" >> $@ 2>&1; \ $(call test,$@) && \ @@ -244,8 +291,8 @@ $(run_O): $(call clean) @TMP_O=$$(mktemp -d); \ TMP_DEST=$$(mktemp -d); \ - cmd="cd $(PERF) && make -f $(MK) O=$$TMP_O DESTDIR=$$TMP_DEST $($(patsubst %_O,%,$@))"; \ - echo "- $@: $$cmd" && echo $$cmd > $@ && \ + cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \ + printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 && \ echo " test: $(call test_O,$@)" >> $@ 2>&1; \ $(call test_O,$@) && \ @@ -254,25 +301,63 @@ $(run_O): tarpkg: @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \ echo "- $@: $$cmd" && echo $$cmd > $@ && \ - ( eval $$cmd ) >> $@ 2>&1 + ( eval $$cmd ) >> $@ 2>&1 && \ + rm -f $@ + +KERNEL_O := ../.. +ifneq ($(O),) + KERNEL_O := $(O) +endif make_kernelsrc: - @echo "- make -C <kernelsrc> tools/perf" + @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf" $(call clean); \ - (make -C ../.. tools/perf) > $@ 2>&1 && \ - test -x perf && rm -f $@ || (cat $@ ; false) + (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \ + test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) make_kernelsrc_tools: - @echo "- make -C <kernelsrc>/tools perf" + @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf" $(call clean); \ - (make -C ../../tools perf) > $@ 2>&1 && \ - test -x perf && rm -f $@ || (cat $@ ; false) + (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \ + test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false) + +FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP +FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) out: $(run_O) @echo OK + @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC) + +ifeq ($(REUSE_FEATURES_DUMP),1) +$(FEATURES_DUMP_FILE): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +$(FEATURES_DUMP_FILE_STATIC): + $(call clean) + @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \ + echo "- $@: $$cmd" && echo $$cmd && \ + ( eval $$cmd ) > /dev/null 2>&1 + +# Add feature dump dependency for run/run_O targets +$(foreach t,$(run) $(run_O),$(eval \ + $(t): $(if $(findstring make_static,$(t)),\ + $(FEATURES_DUMP_FILE_STATIC),\ + $(FEATURES_DUMP_FILE)))) + +# Append 'FEATURES_DUMP=' option to all test cases. For example: +# make_no_libbpf: NO_LIBBPF=1 --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP +# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC +$(foreach t,$(run),$(if $(findstring make_static,$(t)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\ + $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE)))) +endif -.PHONY: all $(run) $(run_O) tarpkg clean +.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools endif # ifndef MK diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 4495493c9431..359e98fcd94c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -16,7 +16,7 @@ * Then it checks if the number of syscalls reported as perf events by * the kernel corresponds to the number of syscalls made. */ -int test__basic_mmap(void) +int test__basic_mmap(int subtest __maybe_unused) { int err = -1; union perf_event *event; diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 145050e2e544..0c5ce44f723f 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -149,7 +149,6 @@ static int synth_process(struct machine *machine) static int mmap_events(synth_cb synth) { - struct machines machines; struct machine *machine; int err, i; @@ -162,8 +161,7 @@ static int mmap_events(synth_cb synth) */ TEST_ASSERT_VAL("failed to create threads", !threads_create()); - machines__init(&machines); - machine = &machines.host; + machine = machine__new_host(); dump_trace = verbose > 1 ? 1 : 0; @@ -203,7 +201,7 @@ static int mmap_events(synth_cb synth) } machine__delete_threads(machine); - machines__exit(&machines); + machine__delete(machine); return err; } @@ -221,7 +219,7 @@ static int mmap_events(synth_cb synth) * * by using all thread objects. */ -int test__mmap_thread_lookup(void) +int test__mmap_thread_lookup(int subtest __maybe_unused) { /* perf_event__synthesize_threads synthesize */ TEST_ASSERT_VAL("failed with sythesizing all", diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 2006485a2859..53c2273e8859 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -7,7 +7,7 @@ #include "debug.h" #include "stat.h" -int test__openat_syscall_event_on_all_cpus(void) +int test__openat_syscall_event_on_all_cpus(int subtest __maybe_unused) { int err = -1, fd, cpu; struct cpu_map *cpus; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 5e811cd8f1c3..eb99a105f31c 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -6,7 +6,7 @@ #include "tests.h" #include "debug.h" -int test__syscall_openat_tp_fields(void) +int test__syscall_openat_tp_fields(int subtest __maybe_unused) { struct record_opts opts = { .target = { diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 033b54797b8a..1184f9ba6499 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -5,7 +5,7 @@ #include "debug.h" #include "tests.h" -int test__openat_syscall_event(void) +int test__openat_syscall_event(int subtest __maybe_unused) { int err = -1, fd; struct perf_evsel *evsel; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 636d7b42d844..7865f68dc0d8 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1271,6 +1271,38 @@ static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist) return 0; } +static int test__checkevent_config_symbol(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0); + return 0; +} + +static int test__checkevent_config_raw(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0); + return 0; +} + +static int test__checkevent_config_num(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0); + return 0; +} + +static int test__checkevent_config_cache(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; @@ -1579,6 +1611,26 @@ static struct evlist_test test__events[] = { .check = test__checkevent_precise_max_modifier, .id = 47, }, + { + .name = "instructions/name=insn/", + .check = test__checkevent_config_symbol, + .id = 48, + }, + { + .name = "r1234/name=rawpmu/", + .check = test__checkevent_config_raw, + .id = 49, + }, + { + .name = "4:0x6530160/name=numpmu/", + .check = test__checkevent_config_num, + .id = 50, + }, + { + .name = "L1-dcache-misses/name=cachepmu/", + .check = test__checkevent_config_cache, + .id = 51, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1666,7 +1718,7 @@ static int test_term(struct terms_test *t) } ret = t->check(&terms); - parse_events__free_terms(&terms); + parse_events_terms__purge(&terms); return ret; } @@ -1765,7 +1817,7 @@ static void debug_warn(const char *warn, va_list params) fprintf(stderr, " Warning: %s\n", msg); } -int test__parse_events(void) +int test__parse_events(int subtest __maybe_unused) { int ret1, ret2 = 0; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index 2c63ea658541..294c76b01b41 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -67,7 +67,7 @@ struct test_attr_event { * * Return: %0 on success, %-1 if the test fails. */ -int test__parse_no_sample_id_all(void) +int test__parse_no_sample_id_all(int subtest __maybe_unused) { int err; diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 7a228a2a070b..1cc78cefe399 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -32,7 +32,7 @@ realloc: return cpu; } -int test__PERF_RECORD(void) +int test__PERF_RECORD(int subtest __maybe_unused) { struct record_opts opts = { .target = { @@ -40,12 +40,11 @@ int test__PERF_RECORD(void) .uses_mmap = true, }, .no_buffering = true, - .freq = 10, .mmap_pages = 256, }; cpu_set_t cpu_mask; size_t cpu_mask_size = sizeof(cpu_mask); - struct perf_evlist *evlist = perf_evlist__new_default(); + struct perf_evlist *evlist = perf_evlist__new_dummy(); struct perf_evsel *evsel; struct perf_sample sample; const char *cmd = "sleep"; @@ -61,6 +60,9 @@ int test__PERF_RECORD(void) int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, }; char sbuf[STRERR_BUFSIZE]; + if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */ + evlist = perf_evlist__new_default(); + if (evlist == NULL || argv == NULL) { pr_debug("Not enough memory to create evlist\n"); goto out; diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index faa04e9d5d5f..1e2ba2602930 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -133,7 +133,7 @@ static struct list_head *test_terms_list(void) return &terms; } -int test__pmu(void) +int test__pmu(int subtest __maybe_unused) { char *format = test_format_dir_get(); LIST_HEAD(formats); diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 7760277c6def..7a52834ee0d0 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -4,11 +4,12 @@ #include <stdio.h> #include <stdlib.h> +#include <linux/compiler.h> #include "tests.h" extern int verbose; -int test__python_use(void) +int test__python_use(int subtest __maybe_unused) { char *cmd; int ret; diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 30c02181e78b..5f23710b9fee 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -290,7 +290,7 @@ out_free: * checks sample format bits separately and together. If the test passes %0 is * returned, otherwise %-1 is returned. */ -int test__sample_parsing(void) +int test__sample_parsing(int subtest __maybe_unused) { const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15}; u64 sample_type; diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c new file mode 100644 index 000000000000..6a20ff2326bb --- /dev/null +++ b/tools/perf/tests/stat.c @@ -0,0 +1,111 @@ +#include <linux/compiler.h> +#include "event.h" +#include "tests.h" +#include "stat.h" +#include "counts.h" +#include "debug.h" + +static bool has_term(struct stat_config_event *config, + u64 tag, u64 val) +{ + unsigned i; + + for (i = 0; i < config->nr; i++) { + if ((config->data[i].tag == tag) && + (config->data[i].val == val)) + return true; + } + + return false; +} + +static int process_stat_config_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct stat_config_event *config = &event->stat_config; + struct perf_stat_config stat_config; + +#define HAS(term, val) \ + has_term(config, PERF_STAT_CONFIG_TERM__##term, val) + + TEST_ASSERT_VAL("wrong nr", config->nr == PERF_STAT_CONFIG_TERM__MAX); + TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE)); + TEST_ASSERT_VAL("wrong scale", HAS(SCALE, 1)); + TEST_ASSERT_VAL("wrong interval", HAS(INTERVAL, 1)); + +#undef HAS + + perf_event__read_stat_config(&stat_config, config); + + TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE); + TEST_ASSERT_VAL("wrong scale", stat_config.scale == 1); + TEST_ASSERT_VAL("wrong interval", stat_config.interval == 1); + return 0; +} + +int test__synthesize_stat_config(int subtest __maybe_unused) +{ + struct perf_stat_config stat_config = { + .aggr_mode = AGGR_CORE, + .scale = 1, + .interval = 1, + }; + + TEST_ASSERT_VAL("failed to synthesize stat_config", + !perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL)); + + return 0; +} + +static int process_stat_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct stat_event *st = &event->stat; + + TEST_ASSERT_VAL("wrong cpu", st->cpu == 1); + TEST_ASSERT_VAL("wrong thread", st->thread == 2); + TEST_ASSERT_VAL("wrong id", st->id == 3); + TEST_ASSERT_VAL("wrong val", st->val == 100); + TEST_ASSERT_VAL("wrong run", st->ena == 200); + TEST_ASSERT_VAL("wrong ena", st->run == 300); + return 0; +} + +int test__synthesize_stat(int subtest __maybe_unused) +{ + struct perf_counts_values count; + + count.val = 100; + count.ena = 200; + count.run = 300; + + TEST_ASSERT_VAL("failed to synthesize stat_config", + !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL)); + + return 0; +} + +static int process_stat_round_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct stat_round_event *stat_round = &event->stat_round; + + TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef); + TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL); + return 0; +} + +int test__synthesize_stat_round(int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("failed to synthesize stat_config", + !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL, + process_stat_round_event, NULL)); + + return 0; +} diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 5b83f56a3b6f..36e8ce1550e3 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -122,7 +122,7 @@ out_delete_evlist: return err; } -int test__sw_clock_freq(void) +int test__sw_clock_freq(int subtest __maybe_unused) { int ret; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index e698742d4fec..ebd80168d51e 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -305,7 +305,7 @@ out_free_nodes: * evsel->system_wide and evsel->tracking flags (respectively) with other events * sometimes enabled or disabled. */ -int test__switch_tracking(void) +int test__switch_tracking(int subtest __maybe_unused) { const char *sched_switch = "sched:sched_switch"; struct switch_tracking switch_tracking = { .tids = NULL, }; @@ -366,7 +366,7 @@ int test__switch_tracking(void) /* Third event */ if (!perf_evlist__can_select_event(evlist, sched_switch)) { - fprintf(stderr, " (no sched_switch)"); + pr_debug("No sched_switch\n"); err = 0; goto out; } @@ -442,7 +442,7 @@ int test__switch_tracking(void) } if (perf_evlist__open(evlist) < 0) { - fprintf(stderr, " (not supported)"); + pr_debug("Not supported\n"); err = 0; goto out; } @@ -455,7 +455,7 @@ int test__switch_tracking(void) perf_evlist__enable(evlist); - err = perf_evlist__disable_event(evlist, cpu_clocks_evsel); + err = perf_evsel__disable(cpu_clocks_evsel); if (err) { pr_debug("perf_evlist__disable_event failed!\n"); goto out_err; @@ -474,7 +474,7 @@ int test__switch_tracking(void) goto out_err; } - err = perf_evlist__disable_event(evlist, cycles_evsel); + err = perf_evsel__disable(cycles_evsel); if (err) { pr_debug("perf_evlist__disable_event failed!\n"); goto out_err; @@ -500,7 +500,7 @@ int test__switch_tracking(void) goto out_err; } - err = perf_evlist__enable_event(evlist, cycles_evsel); + err = perf_evsel__enable(cycles_evsel); if (err) { pr_debug("perf_evlist__disable_event failed!\n"); goto out_err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index add16385f13e..2dfff7ac8ef3 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -31,7 +31,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, * if the number of exit event reported by the kernel is 1 or not * in order to check the kernel returns correct number of event. */ -int test__task_exit(void) +int test__task_exit(int subtest __maybe_unused) { int err = -1; union perf_event *event; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index c80486969f83..82b2b5e6ba7c 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -1,6 +1,8 @@ #ifndef TESTS_H #define TESTS_H +#include <stdbool.h> + #define TEST_ASSERT_VAL(text, cond) \ do { \ if (!(cond)) { \ @@ -26,47 +28,63 @@ enum { struct test { const char *desc; - int (*func)(void); + int (*func)(int subtest); + struct { + bool skip_if_fail; + int (*get_nr)(void); + const char *(*get_desc)(int subtest); + } subtest; }; /* Tests */ -int test__vmlinux_matches_kallsyms(void); -int test__openat_syscall_event(void); -int test__openat_syscall_event_on_all_cpus(void); -int test__basic_mmap(void); -int test__PERF_RECORD(void); -int test__perf_evsel__roundtrip_name_test(void); -int test__perf_evsel__tp_sched_test(void); -int test__syscall_openat_tp_fields(void); -int test__pmu(void); -int test__attr(void); -int test__dso_data(void); -int test__dso_data_cache(void); -int test__dso_data_reopen(void); -int test__parse_events(void); -int test__hists_link(void); -int test__python_use(void); -int test__bp_signal(void); -int test__bp_signal_overflow(void); -int test__task_exit(void); -int test__sw_clock_freq(void); -int test__code_reading(void); -int test__sample_parsing(void); -int test__keep_tracking(void); -int test__parse_no_sample_id_all(void); -int test__dwarf_unwind(void); -int test__hists_filter(void); -int test__mmap_thread_lookup(void); -int test__thread_mg_share(void); -int test__hists_output(void); -int test__hists_cumulate(void); -int test__switch_tracking(void); -int test__fdarray__filter(void); -int test__fdarray__add(void); -int test__kmod_path__parse(void); -int test__thread_map(void); -int test__llvm(void); -int test_session_topology(void); +int test__vmlinux_matches_kallsyms(int subtest); +int test__openat_syscall_event(int subtest); +int test__openat_syscall_event_on_all_cpus(int subtest); +int test__basic_mmap(int subtest); +int test__PERF_RECORD(int subtest); +int test__perf_evsel__roundtrip_name_test(int subtest); +int test__perf_evsel__tp_sched_test(int subtest); +int test__syscall_openat_tp_fields(int subtest); +int test__pmu(int subtest); +int test__attr(int subtest); +int test__dso_data(int subtest); +int test__dso_data_cache(int subtest); +int test__dso_data_reopen(int subtest); +int test__parse_events(int subtest); +int test__hists_link(int subtest); +int test__python_use(int subtest); +int test__bp_signal(int subtest); +int test__bp_signal_overflow(int subtest); +int test__task_exit(int subtest); +int test__sw_clock_freq(int subtest); +int test__code_reading(int subtest); +int test__sample_parsing(int subtest); +int test__keep_tracking(int subtest); +int test__parse_no_sample_id_all(int subtest); +int test__dwarf_unwind(int subtest); +int test__hists_filter(int subtest); +int test__mmap_thread_lookup(int subtest); +int test__thread_mg_share(int subtest); +int test__hists_output(int subtest); +int test__hists_cumulate(int subtest); +int test__switch_tracking(int subtest); +int test__fdarray__filter(int subtest); +int test__fdarray__add(int subtest); +int test__kmod_path__parse(int subtest); +int test__thread_map(int subtest); +int test__llvm(int subtest); +const char *test__llvm_subtest_get_desc(int subtest); +int test__llvm_subtest_get_nr(void); +int test__bpf(int subtest); +const char *test__bpf_subtest_get_desc(int subtest); +int test__bpf_subtest_get_nr(void); +int test_session_topology(int subtest); +int test__thread_map_synthesize(int subtest); +int test__cpu_map_synthesize(int subtest); +int test__synthesize_stat_config(int subtest); +int test__synthesize_stat(int subtest); +int test__synthesize_stat_round(int subtest); +int test__event_update(int subtest); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 138a0e3431fa..fccde848fe9c 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -4,7 +4,7 @@ #include "thread_map.h" #include "debug.h" -int test__thread_map(void) +int test__thread_map(int subtest __maybe_unused) { struct thread_map *map; @@ -40,3 +40,46 @@ int test__thread_map(void) thread_map__put(map); return 0; } + +static int process_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct thread_map_event *map = &event->thread_map; + struct thread_map *threads; + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", map->entries[0].pid == (u64) getpid()); + TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, "perf")); + + threads = thread_map__new_event(&event->thread_map); + TEST_ASSERT_VAL("failed to alloc map", threads); + + TEST_ASSERT_VAL("wrong nr", threads->nr == 1); + TEST_ASSERT_VAL("wrong pid", + thread_map__pid(threads, 0) == getpid()); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(threads, 0) && + !strcmp(thread_map__comm(threads, 0), "perf")); + TEST_ASSERT_VAL("wrong refcnt", + atomic_read(&threads->refcnt) == 1); + thread_map__put(threads); + return 0; +} + +int test__thread_map_synthesize(int subtest __maybe_unused) +{ + struct thread_map *threads; + + /* test map on current pid */ + threads = thread_map__new_by_pid(getpid()); + TEST_ASSERT_VAL("failed to alloc map", threads); + + thread_map__read_comms(threads); + + TEST_ASSERT_VAL("failed to synthesize map", + !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL)); + + return 0; +} diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index 01fabb19d746..188b63140fc8 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -4,7 +4,7 @@ #include "map.h" #include "debug.h" -int test__thread_mg_share(void) +int test__thread_mg_share(int subtest __maybe_unused) { struct machines machines; struct machine *machine; diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index f5bb096c3bd9..98fe69ac553c 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -84,7 +84,7 @@ static int check_cpu_topology(char *path, struct cpu_map *map) return 0; } -int test_session_topology(void) +int test_session_topology(int subtest __maybe_unused) { char path[PATH_MAX]; struct cpu_map *map; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index d677e018e504..630b0b409b97 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -18,7 +18,7 @@ static int vmlinux_matches_kallsyms_filter(struct map *map __maybe_unused, #define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x)) -int test__vmlinux_matches_kallsyms(void) +int test__vmlinux_matches_kallsyms(int subtest __maybe_unused) { int err = -1; struct rb_node *nd; @@ -110,7 +110,6 @@ int test__vmlinux_matches_kallsyms(void) */ for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { struct symbol *pair, *first_pair; - bool backwards = true; sym = rb_entry(nd, struct symbol, rb_node); @@ -151,27 +150,14 @@ next_pair: continue; } else { - struct rb_node *nnd; -detour: - nnd = backwards ? rb_prev(&pair->rb_node) : - rb_next(&pair->rb_node); - if (nnd) { - struct symbol *next = rb_entry(nnd, struct symbol, rb_node); - - if (UM(next->start) == mem_start) { - pair = next; + pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL, NULL); + if (pair) { + if (UM(pair->start) == mem_start) goto next_pair; - } - } - if (backwards) { - backwards = false; - pair = first_pair; - goto detour; + pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", + mem_start, sym->name, pair->name); } - - pr_debug("%#" PRIx64 ": diff name v: %s k: %s\n", - mem_start, sym->name, pair->name); } } else pr_debug("%#" PRIx64 ": %s not on kallsyms\n", diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index e9703c0829f1..af68a9d488bf 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -528,11 +528,11 @@ static struct ui_browser_colorset { .colorset = HE_COLORSET_SELECTED, .name = "selected", .fg = "black", - .bg = "lightgray", + .bg = "yellow", }, { - .colorset = HE_COLORSET_CODE, - .name = "code", + .colorset = HE_COLORSET_JUMP_ARROWS, + .name = "jump_arrows", .fg = "blue", .bg = "default", }, diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 01781de59532..be3b70eb5fca 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -7,7 +7,7 @@ #define HE_COLORSET_MEDIUM 51 #define HE_COLORSET_NORMAL 52 #define HE_COLORSET_SELECTED 53 -#define HE_COLORSET_CODE 54 +#define HE_COLORSET_JUMP_ARROWS 54 #define HE_COLORSET_ADDR 55 #define HE_COLORSET_ROOT 56 diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d4d7cc27252f..4fc208e82c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -284,7 +284,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) to = (u64)btarget->idx; } - ui_browser__set_color(browser, HE_COLORSET_CODE); + ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, from, to); } @@ -755,11 +755,11 @@ static int annotate_browser__run(struct annotate_browser *browser, nd = browser->curr_hot; break; case K_UNTAB: - if (nd != NULL) + if (nd != NULL) { nd = rb_next(nd); if (nd == NULL) nd = rb_first(&browser->entries); - else + } else nd = browser->curr_hot; break; case K_F1: diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e5afb8936040..4b9816555946 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -32,6 +32,7 @@ struct hist_browser { bool show_headers; float min_pcnt; u64 nr_non_filtered_entries; + u64 nr_hierarchy_entries; u64 nr_callchain_rows; }; @@ -58,11 +59,11 @@ static int hist_browser__get_folding(struct hist_browser *browser) for (nd = rb_first(&hists->entries); (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { + nd = rb_hierarchy_next(nd)) { struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - if (he->unfolded) + if (he->leaf && he->unfolded) unfolded_rows += he->nr_rows; } return unfolded_rows; @@ -72,7 +73,9 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) { u32 nr_entries; - if (hist_browser__has_filter(hb)) + if (symbol_conf.report_hierarchy) + nr_entries = hb->nr_hierarchy_entries; + else if (hist_browser__has_filter(hb)) nr_entries = hb->nr_non_filtered_entries; else nr_entries = hb->hists->nr_entries; @@ -178,12 +181,51 @@ static int callchain_node__count_rows_rb_tree(struct callchain_node *node) return n; } +static int callchain_node__count_flat_rows(struct callchain_node *node) +{ + struct callchain_list *chain; + char folded_sign = 0; + int n = 0; + + list_for_each_entry(chain, &node->parent_val, list) { + if (!folded_sign) { + /* only check first chain list entry */ + folded_sign = callchain_list__folded(chain); + if (folded_sign == '+') + return 1; + } + n++; + } + + list_for_each_entry(chain, &node->val, list) { + if (!folded_sign) { + /* node->parent_val list might be empty */ + folded_sign = callchain_list__folded(chain); + if (folded_sign == '+') + return 1; + } + n++; + } + + return n; +} + +static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused) +{ + return 1; +} + static int callchain_node__count_rows(struct callchain_node *node) { struct callchain_list *chain; bool unfolded = false; int n = 0; + if (callchain_param.mode == CHAIN_FLAT) + return callchain_node__count_flat_rows(node); + else if (callchain_param.mode == CHAIN_FOLDED) + return callchain_node__count_folded_rows(node); + list_for_each_entry(chain, &node->val, list) { ++n; unfolded = chain->unfolded; @@ -208,6 +250,38 @@ static int callchain__count_rows(struct rb_root *chain) return n; } +static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, + bool include_children) +{ + int count = 0; + struct rb_node *node; + struct hist_entry *child; + + if (he->leaf) + return callchain__count_rows(&he->sorted_chain); + + if (he->has_no_entry) + return 1; + + node = rb_first(&he->hroot_out); + while (node) { + float percent; + + child = rb_entry(node, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + + if (!child->filtered && percent >= hb->min_pcnt) { + count++; + + if (include_children && child->unfolded) + count += hierarchy_count_rows(hb, child, true); + } + + node = rb_next(node); + } + return count; +} + static bool hist_entry__toggle_fold(struct hist_entry *he) { if (!he) @@ -263,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node, chain = list_entry(node->val.next, struct callchain_list, list); chain->has_children = has_sibling; - if (!list_empty(&node->val)) { + if (node->val.next != node->val.prev) { chain = list_entry(node->val.prev, struct callchain_list, list); chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); } @@ -279,16 +353,25 @@ static void callchain__init_have_children(struct rb_root *root) for (nd = rb_first(root); nd; nd = rb_next(nd)) { struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); callchain_node__init_have_children(node, has_sibling); + if (callchain_param.mode == CHAIN_FLAT || + callchain_param.mode == CHAIN_FOLDED) + callchain_node__make_parent_list(node); } } static void hist_entry__init_have_children(struct hist_entry *he) { - if (!he->init_have_children) { + if (he->init_have_children) + return; + + if (he->leaf) { he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); - he->init_have_children = true; + } else { + he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); } + + he->init_have_children = true; } static bool hist_browser__toggle_fold(struct hist_browser *browser) @@ -298,23 +381,58 @@ static bool hist_browser__toggle_fold(struct hist_browser *browser) struct callchain_list *cl = container_of(ms, struct callchain_list, ms); bool has_children; + if (!he || !ms) + return false; + if (ms == &he->ms) has_children = hist_entry__toggle_fold(he); else has_children = callchain_list__toggle_fold(cl); if (has_children) { + int child_rows = 0; + hist_entry__init_have_children(he); browser->b.nr_entries -= he->nr_rows; - browser->nr_callchain_rows -= he->nr_rows; - if (he->unfolded) - he->nr_rows = callchain__count_rows(&he->sorted_chain); + if (he->leaf) + browser->nr_callchain_rows -= he->nr_rows; else + browser->nr_hierarchy_entries -= he->nr_rows; + + if (symbol_conf.report_hierarchy) + child_rows = hierarchy_count_rows(browser, he, true); + + if (he->unfolded) { + if (he->leaf) + he->nr_rows = callchain__count_rows(&he->sorted_chain); + else + he->nr_rows = hierarchy_count_rows(browser, he, false); + + /* account grand children */ + if (symbol_conf.report_hierarchy) + browser->b.nr_entries += child_rows - he->nr_rows; + + if (!he->leaf && he->nr_rows == 0) { + he->has_no_entry = true; + he->nr_rows = 1; + } + } else { + if (symbol_conf.report_hierarchy) + browser->b.nr_entries -= child_rows - he->nr_rows; + + if (he->has_no_entry) + he->has_no_entry = false; + he->nr_rows = 0; + } browser->b.nr_entries += he->nr_rows; - browser->nr_callchain_rows += he->nr_rows; + + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else + browser->nr_hierarchy_entries += he->nr_rows; return true; } @@ -377,13 +495,38 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold) return n; } -static void hist_entry__set_folding(struct hist_entry *he, bool unfold) +static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, + bool unfold __maybe_unused) +{ + float percent; + struct rb_node *nd; + struct hist_entry *child; + int n = 0; + + for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { + child = rb_entry(nd, struct hist_entry, rb_node); + percent = hist_entry__get_percent_limit(child); + if (!child->filtered && percent >= hb->min_pcnt) + n++; + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *he, + struct hist_browser *hb, bool unfold) { hist_entry__init_have_children(he); he->unfolded = unfold ? he->has_children : false; if (he->has_children) { - int n = callchain__set_folding(&he->sorted_chain, unfold); + int n; + + if (he->leaf) + n = callchain__set_folding(&he->sorted_chain, unfold); + else + n = hierarchy_set_folding(hb, he, unfold); + he->nr_rows = unfold ? n : 0; } else he->nr_rows = 0; @@ -393,19 +536,38 @@ static void __hist_browser__set_folding(struct hist_browser *browser, bool unfold) { struct rb_node *nd; - struct hists *hists = browser->hists; + struct hist_entry *he; + double percent; - for (nd = rb_first(&hists->entries); - (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; - nd = rb_next(nd)) { - struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); - hist_entry__set_folding(he, unfold); - browser->nr_callchain_rows += he->nr_rows; + nd = rb_first(&browser->hists->entries); + while (nd) { + he = rb_entry(nd, struct hist_entry, rb_node); + + /* set folding state even if it's currently folded */ + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); + + hist_entry__set_folding(he, browser, unfold); + + percent = hist_entry__get_percent_limit(he); + if (he->filtered || percent < browser->min_pcnt) + continue; + + if (!he->depth || unfold) + browser->nr_hierarchy_entries++; + if (he->leaf) + browser->nr_callchain_rows += he->nr_rows; + else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) { + browser->nr_hierarchy_entries++; + he->has_no_entry = true; + he->nr_rows = 1; + } else + he->has_no_entry = false; } } static void hist_browser__set_folding(struct hist_browser *browser, bool unfold) { + browser->nr_hierarchy_entries = 0; browser->nr_callchain_rows = 0; __hist_browser__set_folding(browser, unfold); @@ -435,7 +597,7 @@ static int hist_browser__run(struct hist_browser *browser, const char *help) hists__browser_title(browser->hists, hbt, title, sizeof(title)); - if (ui_browser__show(&browser->b, title, help) < 0) + if (ui_browser__show(&browser->b, title, "%s", help) < 0) return -1; while (1) { @@ -574,33 +736,105 @@ static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_u #define LEVEL_OFFSET_STEP 3 -static int hist_browser__show_callchain(struct hist_browser *browser, - struct rb_root *root, int level, - unsigned short row, u64 total, - print_callchain_entry_fn print, - struct callchain_print_arg *arg, - check_output_full_fn is_output_full) +static int hist_browser__show_callchain_list(struct hist_browser *browser, + struct callchain_node *node, + struct callchain_list *chain, + unsigned short row, u64 total, + bool need_percent, int offset, + print_callchain_entry_fn print, + struct callchain_print_arg *arg) +{ + char bf[1024], *alloc_str; + const char *str; + + if (arg->row_offset != 0) { + arg->row_offset--; + return 0; + } + + alloc_str = NULL; + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); + + if (need_percent) { + char buf[64]; + + callchain_node__scnprintf_value(node, buf, sizeof(buf), + total); + + if (asprintf(&alloc_str, "%s %s", buf, str) < 0) + str = "Not enough memory!"; + else + str = alloc_str; + } + + print(browser, chain, str, offset, row, arg); + + free(alloc_str); + return 1; +} + +static bool check_percent_display(struct rb_node *node, u64 parent_total) +{ + struct callchain_node *child; + + if (node == NULL) + return false; + + if (rb_next(node)) + return true; + + child = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(child) != parent_total; +} + +static int hist_browser__show_callchain_flat(struct hist_browser *browser, + struct rb_root *root, + unsigned short row, u64 total, + u64 parent_total, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) { struct rb_node *node; - int first_row = row, offset = level * LEVEL_OFFSET_STEP; - u64 new_total; + int first_row = row, offset = LEVEL_OFFSET_STEP; bool need_percent; node = rb_first(root); - need_percent = node && rb_next(node); + need_percent = check_percent_display(node, parent_total); while (node) { struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); struct rb_node *next = rb_next(node); - u64 cumul = callchain_cumul_hits(child); struct callchain_list *chain; char folded_sign = ' '; int first = true; int extra_offset = 0; + list_for_each_entry(chain, &child->parent_val, list) { + bool was_first = first; + + if (first) + first = false; + else if (need_percent) + extra_offset = LEVEL_OFFSET_STEP; + + folded_sign = callchain_list__folded(chain); + + row += hist_browser__show_callchain_list(browser, child, + chain, row, total, + was_first && need_percent, + offset + extra_offset, + print, arg); + + if (is_output_full(browser, row)) + goto out; + + if (folded_sign == '+') + goto next; + } + list_for_each_entry(chain, &child->val, list) { - char bf[1024], *alloc_str; - const char *str; bool was_first = first; if (first) @@ -609,31 +843,186 @@ static int hist_browser__show_callchain(struct hist_browser *browser, extra_offset = LEVEL_OFFSET_STEP; folded_sign = callchain_list__folded(chain); - if (arg->row_offset != 0) { - arg->row_offset--; - goto do_next; + + row += hist_browser__show_callchain_list(browser, child, + chain, row, total, + was_first && need_percent, + offset + extra_offset, + print, arg); + + if (is_output_full(browser, row)) + goto out; + + if (folded_sign == '+') + break; + } + +next: + if (is_output_full(browser, row)) + break; + node = next; + } +out: + return row - first_row; +} + +static char *hist_browser__folded_callchain_str(struct hist_browser *browser, + struct callchain_list *chain, + char *value_str, char *old_str) +{ + char bf[1024]; + const char *str; + char *new; + + str = callchain_list__sym_name(chain, bf, sizeof(bf), + browser->show_dso); + if (old_str) { + if (asprintf(&new, "%s%s%s", old_str, + symbol_conf.field_sep ?: ";", str) < 0) + new = NULL; + } else { + if (value_str) { + if (asprintf(&new, "%s %s", value_str, str) < 0) + new = NULL; + } else { + if (asprintf(&new, "%s", str) < 0) + new = NULL; + } + } + return new; +} + +static int hist_browser__show_callchain_folded(struct hist_browser *browser, + struct rb_root *root, + unsigned short row, u64 total, + u64 parent_total, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + struct rb_node *node; + int first_row = row, offset = LEVEL_OFFSET_STEP; + bool need_percent; + + node = rb_first(root); + need_percent = check_percent_display(node, parent_total); + + while (node) { + struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); + struct rb_node *next = rb_next(node); + struct callchain_list *chain, *first_chain = NULL; + int first = true; + char *value_str = NULL, *value_str_alloc = NULL; + char *chain_str = NULL, *chain_str_alloc = NULL; + + if (arg->row_offset != 0) { + arg->row_offset--; + goto next; + } + + if (need_percent) { + char buf[64]; + + callchain_node__scnprintf_value(child, buf, sizeof(buf), total); + if (asprintf(&value_str, "%s", buf) < 0) { + value_str = (char *)"<...>"; + goto do_print; } + value_str_alloc = value_str; + } - alloc_str = NULL; - str = callchain_list__sym_name(chain, bf, sizeof(bf), - browser->show_dso); + list_for_each_entry(chain, &child->parent_val, list) { + chain_str = hist_browser__folded_callchain_str(browser, + chain, value_str, chain_str); + if (first) { + first = false; + first_chain = chain; + } - if (was_first && need_percent) { - double percent = cumul * 100.0 / total; + if (chain_str == NULL) { + chain_str = (char *)"Not enough memory!"; + goto do_print; + } - if (asprintf(&alloc_str, "%2.2f%% %s", percent, str) < 0) - str = "Not enough memory!"; - else - str = alloc_str; + chain_str_alloc = chain_str; + } + + list_for_each_entry(chain, &child->val, list) { + chain_str = hist_browser__folded_callchain_str(browser, + chain, value_str, chain_str); + if (first) { + first = false; + first_chain = chain; } - print(browser, chain, str, offset + extra_offset, row, arg); + if (chain_str == NULL) { + chain_str = (char *)"Not enough memory!"; + goto do_print; + } + + chain_str_alloc = chain_str; + } + +do_print: + print(browser, first_chain, chain_str, offset, row++, arg); + free(value_str_alloc); + free(chain_str_alloc); + +next: + if (is_output_full(browser, row)) + break; + node = next; + } + + return row - first_row; +} + +static int hist_browser__show_callchain_graph(struct hist_browser *browser, + struct rb_root *root, int level, + unsigned short row, u64 total, + u64 parent_total, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + struct rb_node *node; + int first_row = row, offset = level * LEVEL_OFFSET_STEP; + bool need_percent; + u64 percent_total = total; + + if (callchain_param.mode == CHAIN_GRAPH_REL) + percent_total = parent_total; - free(alloc_str); + node = rb_first(root); + need_percent = check_percent_display(node, parent_total); - if (is_output_full(browser, ++row)) + while (node) { + struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node); + struct rb_node *next = rb_next(node); + struct callchain_list *chain; + char folded_sign = ' '; + int first = true; + int extra_offset = 0; + + list_for_each_entry(chain, &child->val, list) { + bool was_first = first; + + if (first) + first = false; + else if (need_percent) + extra_offset = LEVEL_OFFSET_STEP; + + folded_sign = callchain_list__folded(chain); + + row += hist_browser__show_callchain_list(browser, child, + chain, row, percent_total, + was_first && need_percent, + offset + extra_offset, + print, arg); + + if (is_output_full(browser, row)) goto out; -do_next: + if (folded_sign == '+') break; } @@ -641,13 +1030,9 @@ do_next: if (folded_sign == '-') { const int new_level = level + (extra_offset ? 2 : 1); - if (callchain_param.mode == CHAIN_GRAPH_REL) - new_total = child->children_hit; - else - new_total = total; - - row += hist_browser__show_callchain(browser, &child->rb_root, - new_level, row, new_total, + row += hist_browser__show_callchain_graph(browser, &child->rb_root, + new_level, row, total, + child->children_hit, print, arg, is_output_full); } if (is_output_full(browser, row)) @@ -658,6 +1043,45 @@ out: return row - first_row; } +static int hist_browser__show_callchain(struct hist_browser *browser, + struct hist_entry *entry, int level, + unsigned short row, + print_callchain_entry_fn print, + struct callchain_print_arg *arg, + check_output_full_fn is_output_full) +{ + u64 total = hists__total_period(entry->hists); + u64 parent_total; + int printed; + + if (symbol_conf.cumulate_callchain) + parent_total = entry->stat_acc->period; + else + parent_total = entry->stat.period; + + if (callchain_param.mode == CHAIN_FLAT) { + printed = hist_browser__show_callchain_flat(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else if (callchain_param.mode == CHAIN_FOLDED) { + printed = hist_browser__show_callchain_folded(browser, + &entry->sorted_chain, row, + total, parent_total, print, arg, + is_output_full); + } else { + printed = hist_browser__show_callchain_graph(browser, + &entry->sorted_chain, level, row, + total, parent_total, print, arg, + is_output_full); + } + + if (arg->is_current_entry) + browser->he_selection = entry; + + return printed; +} + struct hpp_arg { struct ui_browser *b; char folded_sign; @@ -754,7 +1178,6 @@ static int hist_browser__show_entry(struct hist_browser *browser, struct hist_entry *entry, unsigned short row) { - char s[256]; int printed = 0; int width = browser->b.width; char folded_sign = ' '; @@ -779,17 +1202,20 @@ static int hist_browser__show_entry(struct hist_browser *browser, .folded_sign = folded_sign, .current_entry = current_entry, }; - struct perf_hpp hpp = { - .buf = s, - .size = sizeof(s), - .ptr = &arg, - }; int column = 0; hist_browser__gotorc(browser, row, 0); - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) + hists__for_each_format(browser->hists, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (perf_hpp__should_skip(fmt, entry->hists) || + column++ < browser->b.horiz_scroll) continue; if (current_entry && browser->b.navkeypressed) { @@ -812,11 +1238,18 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (fmt->color) { - width -= fmt->color(fmt, &hpp, entry); + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); } else { - width -= fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry)); ui_browser__printf(&browser->b, "%s", s); } + width -= hpp.buf - s; } /* The scroll bar isn't being used */ @@ -831,31 +1264,246 @@ static int hist_browser__show_entry(struct hist_browser *browser, --row_offset; if (folded_sign == '-' && row != browser->b.rows) { - u64 total = hists__total_period(entry->hists); struct callchain_print_arg arg = { .row_offset = row_offset, .is_current_entry = current_entry, }; - if (callchain_param.mode == CHAIN_GRAPH_REL) { - if (symbol_conf.cumulate_callchain) - total = entry->stat_acc->period; - else - total = entry->stat.period; - } - - printed += hist_browser__show_callchain(browser, - &entry->sorted_chain, 1, row, total, + printed += hist_browser__show_callchain(browser, entry, 1, row, hist_browser__show_callchain_entry, &arg, hist_browser__check_output_full); + } + + return printed; +} + +static int hist_browser__show_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *entry, + unsigned short row, + int level) +{ + int printed = 0; + int width = browser->b.width; + char folded_sign = ' '; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + off_t row_offset = entry->row_offset; + bool first = true; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + struct hpp_arg arg = { + .b = &browser->b, + .current_entry = current_entry, + }; + int column = 0; + int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; + + if (current_entry) { + browser->he_selection = entry; + browser->selection = &entry->ms; + } + + hist_entry__init_have_children(entry); + folded_sign = hist_entry__folded(entry); + arg.folded_sign = folded_sign; + + if (entry->leaf && row_offset) { + row_offset--; + goto show_callchain; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&entry->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (perf_hpp__should_skip(fmt, entry->hists) || + column++ < browser->b.horiz_scroll) + continue; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); + } else { + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); + } + + if (first) { + ui_browser__printf(&browser->b, "%c", folded_sign); + width--; + first = false; + } else { + ui_browser__printf(&browser->b, " "); + width -= 2; + } - if (arg.is_current_entry) - browser->he_selection = entry; + if (fmt->color) { + int ret = fmt->color(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + /* + * fmt->color() already used ui_browser to + * print the non alignment bits, skip it (+ret): + */ + ui_browser__printf(&browser->b, "%s", s + ret); + } else { + int ret = fmt->entry(fmt, &hpp, entry); + hist_entry__snprintf_alignment(entry, &hpp, fmt, ret); + ui_browser__printf(&browser->b, "%s", s); + } + width -= hpp.buf - s; + } + + ui_browser__write_nstring(&browser->b, "", hierarchy_indent); + width -= hierarchy_indent; + + if (column >= browser->b.horiz_scroll) { + char s[2048]; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + .ptr = &arg, + }; + + if (current_entry && browser->b.navkeypressed) { + ui_browser__set_color(&browser->b, + HE_COLORSET_SELECTED); + } else { + ui_browser__set_color(&browser->b, + HE_COLORSET_NORMAL); + } + + perf_hpp_list__for_each_format(entry->hpp_list, fmt) { + ui_browser__write_nstring(&browser->b, "", 2); + width -= 2; + + /* + * No need to call hist_entry__snprintf_alignment() + * since this fmt is always the last column in the + * hierarchy mode. + */ + if (fmt->color) { + width -= fmt->color(fmt, &hpp, entry); + } else { + int i = 0; + + width -= fmt->entry(fmt, &hpp, entry); + ui_browser__printf(&browser->b, "%s", ltrim(s)); + + while (isspace(s[i++])) + width++; + } + } + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + + ++row; + ++printed; + +show_callchain: + if (entry->leaf && folded_sign == '-' && row != browser->b.rows) { + struct callchain_print_arg carg = { + .row_offset = row_offset, + }; + + printed += hist_browser__show_callchain(browser, entry, + level + 1, row, + hist_browser__show_callchain_entry, &carg, + hist_browser__check_output_full); } return printed; } +static int hist_browser__show_no_entry(struct hist_browser *browser, + unsigned short row, int level) +{ + int width = browser->b.width; + bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool first = true; + int column = 0; + int ret; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + int indent = browser->hists->nr_hpp_node - 2; + + if (current_entry) { + browser->he_selection = NULL; + browser->selection = NULL; + } + + hist_browser__gotorc(browser, row, 0); + + if (current_entry && browser->b.navkeypressed) + ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED); + else + ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL); + + ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT); + width -= level * HIERARCHY_INDENT; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&browser->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, browser->hists) || + column++ < browser->b.horiz_scroll) + continue; + + ret = fmt->width(fmt, NULL, hists_to_evsel(browser->hists)); + + if (first) { + /* for folded sign */ + first = false; + ret++; + } else { + /* space between columns */ + ret += 2; + } + + ui_browser__write_nstring(&browser->b, "", ret); + width -= ret; + } + + ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT); + width -= indent * HIERARCHY_INDENT; + + if (column >= browser->b.horiz_scroll) { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt); + ui_browser__printf(&browser->b, " %s", buf); + width -= ret + 2; + } + + /* The scroll bar isn't being used */ + if (!browser->b.navkeypressed) + width += 1; + + ui_browser__write_nstring(&browser->b, "", width); + return 1; +} + static int advance_hpp_check(struct perf_hpp *hpp, int inc) { advance_hpp(hpp, inc); @@ -879,8 +1527,8 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt) || column++ < browser->b.horiz_scroll) + hists__for_each_format(browser->hists, fmt) { + if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll) continue; ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); @@ -895,11 +1543,96 @@ static int hists_browser__scnprintf_headers(struct hist_browser *browser, char * return ret; } +static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size) +{ + struct hists *hists = browser->hists; + struct perf_hpp dummy_hpp = { + .buf = buf, + .size = size, + }; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + size_t ret = 0; + int column = 0; + int indent = hists->nr_hpp_node - 2; + bool first_node, first_col; + + ret = scnprintf(buf, size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (column++ < browser->b.horiz_scroll) + continue; + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s", + indent * HIERARCHY_INDENT, ""); + if (advance_hpp_check(&dummy_hpp, ret)) + return ret; + + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / "); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + char *start; + + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) { + ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+"); + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + first_col = false; + + ret = fmt->header(fmt, &dummy_hpp, hists_to_evsel(hists)); + dummy_hpp.buf[ret] = '\0'; + rtrim(dummy_hpp.buf); + + start = ltrim(dummy_hpp.buf); + ret = strlen(start); + + if (start != dummy_hpp.buf) + memmove(dummy_hpp.buf, start, ret + 1); + + if (advance_hpp_check(&dummy_hpp, ret)) + break; + } + } + + return ret; +} + static void hist_browser__show_headers(struct hist_browser *browser) { char headers[1024]; - hists_browser__scnprintf_headers(browser, headers, sizeof(headers)); + if (symbol_conf.report_hierarchy) + hists_browser__scnprintf_hierarchy_headers(browser, headers, + sizeof(headers)); + else + hists_browser__scnprintf_headers(browser, headers, + sizeof(headers)); ui_browser__gotorc(&browser->b, 0, 0); ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); @@ -928,19 +1661,37 @@ static unsigned int hist_browser__refresh(struct ui_browser *browser) } ui_browser__hists_init_top(browser); + hb->he_selection = NULL; + hb->selection = NULL; - for (nd = browser->top; nd; nd = rb_next(nd)) { + for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; - if (h->filtered) + if (h->filtered) { + /* let it move to sibling */ + h->unfolded = false; continue; + } percent = hist_entry__get_percent_limit(h); if (percent < hb->min_pcnt) continue; - row += hist_browser__show_entry(hb, h, row); + if (symbol_conf.report_hierarchy) { + row += hist_browser__show_hierarchy_entry(hb, h, row, + h->depth); + if (row == browser->rows) + break; + + if (h->has_no_entry) { + hist_browser__show_no_entry(hb, row, h->depth + 1); + row++; + } + } else { + row += hist_browser__show_entry(hb, h, row); + } + if (row == browser->rows) break; } @@ -958,7 +1709,14 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_next(nd); + /* + * If it's filtered, its all children also were filtered. + * So move to sibling node. + */ + if (rb_next(nd)) + nd = rb_next(nd); + else + nd = rb_hierarchy_next(nd); } return NULL; @@ -974,7 +1732,7 @@ static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, if (!h->filtered && percent >= min_pcnt) return nd; - nd = rb_prev(nd); + nd = rb_hierarchy_prev(nd); } return NULL; @@ -1004,8 +1762,8 @@ static void ui_browser__hists_seek(struct ui_browser *browser, nd = browser->top; goto do_offset; case SEEK_END: - nd = hists__filter_prev_entries(rb_last(browser->entries), - hb->min_pcnt); + nd = rb_hierarchy_last(rb_last(browser->entries)); + nd = hists__filter_prev_entries(nd, hb->min_pcnt); first = false; break; default: @@ -1033,10 +1791,13 @@ static void ui_browser__hists_seek(struct ui_browser *browser, * and stop when we printed enough lines to fill the screen. */ do_offset: + if (!nd) + return; + if (offset > 0) { do { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { u16 remaining = h->nr_rows - h->row_offset; if (offset > remaining) { offset -= remaining; @@ -1048,7 +1809,8 @@ do_offset: break; } } - nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); + nd = hists__filter_entries(rb_hierarchy_next(nd), + hb->min_pcnt); if (nd == NULL) break; --offset; @@ -1057,7 +1819,7 @@ do_offset: } else if (offset < 0) { while (1) { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) { + if (h->unfolded && h->leaf) { if (first) { if (-offset > h->row_offset) { offset += h->row_offset; @@ -1081,7 +1843,7 @@ do_offset: } } - nd = hists__filter_prev_entries(rb_prev(nd), + nd = hists__filter_prev_entries(rb_hierarchy_prev(nd), hb->min_pcnt); if (nd == NULL) break; @@ -1094,7 +1856,7 @@ do_offset: * row_offset at its last entry. */ h = rb_entry(nd, struct hist_entry, rb_node); - if (h->unfolded) + if (h->unfolded && h->leaf) h->row_offset = h->nr_rows; break; } @@ -1108,17 +1870,14 @@ do_offset: } static int hist_browser__fprintf_callchain(struct hist_browser *browser, - struct hist_entry *he, FILE *fp) + struct hist_entry *he, FILE *fp, + int level) { - u64 total = hists__total_period(he->hists); struct callchain_print_arg arg = { .fp = fp, }; - if (symbol_conf.cumulate_callchain) - total = he->stat_acc->period; - - hist_browser__show_callchain(browser, &he->sorted_chain, 1, 0, total, + hist_browser__show_callchain(browser, he, level, 0, hist_browser__fprintf_callchain_entry, &arg, hist_browser__check_dump_full); return arg.printed; @@ -1144,8 +1903,8 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, if (symbol_conf.use_callchain) printed += fprintf(fp, "%c ", folded_sign); - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_format(browser->hists, fmt) { + if (perf_hpp__should_skip(fmt, he->hists)) continue; if (!first) { @@ -1155,12 +1914,71 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, first = false; ret = fmt->entry(fmt, &hpp, he); + ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret); advance_hpp(&hpp, ret); } - printed += fprintf(fp, "%s\n", rtrim(s)); + printed += fprintf(fp, "%s\n", s); if (folded_sign == '-') - printed += hist_browser__fprintf_callchain(browser, he, fp); + printed += hist_browser__fprintf_callchain(browser, he, fp, 1); + + return printed; +} + + +static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser, + struct hist_entry *he, + FILE *fp, int level) +{ + char s[8192]; + int printed = 0; + char folded_sign = ' '; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + bool first = true; + int ret; + int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT; + + printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, ""); + + folded_sign = hist_entry__folded(he); + printed += fprintf(fp, "%c", folded_sign); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&he->hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (!first) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + } else + first = false; + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } + + ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, ""); + advance_hpp(&hpp, ret); + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + ret = scnprintf(hpp.buf, hpp.size, " "); + advance_hpp(&hpp, ret); + + ret = fmt->entry(fmt, &hpp, he); + advance_hpp(&hpp, ret); + } + + printed += fprintf(fp, "%s\n", rtrim(s)); + + if (he->leaf && folded_sign == '-') { + printed += hist_browser__fprintf_callchain(browser, he, fp, + he->depth + 1); + } return printed; } @@ -1174,8 +1992,16 @@ static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - printed += hist_browser__fprintf_entry(browser, h, fp); - nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); + if (symbol_conf.report_hierarchy) { + printed += hist_browser__fprintf_hierarchy_entry(browser, + h, fp, + h->depth); + } else { + printed += hist_browser__fprintf_entry(browser, h, fp); + } + + nd = hists__filter_entries(rb_hierarchy_next(nd), + browser->min_pcnt); } return printed; @@ -1310,11 +2136,18 @@ static int hists__browser_title(struct hists *hists, if (hists->uid_filter_str) printed += snprintf(bf + printed, size - printed, ", UID: %s", hists->uid_filter_str); - if (thread) - printed += scnprintf(bf + printed, size - printed, + if (thread) { + if (sort__has_thread) { + printed += scnprintf(bf + printed, size - printed, ", Thread: %s(%d)", (thread->comm_set ? thread__comm_str(thread) : ""), thread->tid); + } else { + printed += scnprintf(bf + printed, size - printed, + ", Thread: %s", + (thread->comm_set ? thread__comm_str(thread) : "")); + } + } if (dso) printed += scnprintf(bf + printed, size - printed, ", DSO: %s", dso->short_name); @@ -1430,7 +2263,6 @@ close_file_and_continue: struct popup_action { struct thread *thread; - struct dso *dso; struct map_symbol ms; int socket; @@ -1490,15 +2322,24 @@ do_zoom_thread(struct hist_browser *browser, struct popup_action *act) { struct thread *thread = act->thread; + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) + return 0; + if (browser->hists->thread_filter) { pstack__remove(browser->pstack, &browser->hists->thread_filter); perf_hpp__set_elide(HISTC_THREAD, false); thread__zput(browser->hists->thread_filter); ui_helpline__pop(); } else { - ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid); + if (sort__has_thread) { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"", + thread->comm_set ? thread__comm_str(thread) : ""); + } + browser->hists->thread_filter = thread__get(thread); perf_hpp__set_elide(HISTC_THREAD, false); pstack__push(browser->pstack, &browser->hists->thread_filter); @@ -1513,13 +2354,22 @@ static int add_thread_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct thread *thread) { - if (thread == NULL) + int ret; + + if ((!sort__has_thread && !sort__has_comm) || thread == NULL) return 0; - if (asprintf(optstr, "Zoom %s %s(%d) thread", - browser->hists->thread_filter ? "out of" : "into", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid) < 0) + if (sort__has_thread) { + ret = asprintf(optstr, "Zoom %s %s(%d) thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + } else { + ret = asprintf(optstr, "Zoom %s %s thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : ""); + } + if (ret < 0) return 0; act->thread = thread; @@ -1532,6 +2382,9 @@ do_zoom_dso(struct hist_browser *browser, struct popup_action *act) { struct map *map = act->ms.map; + if (!sort__has_dso || map == NULL) + return 0; + if (browser->hists->dso_filter) { pstack__remove(browser->pstack, &browser->hists->dso_filter); perf_hpp__set_elide(HISTC_DSO, false); @@ -1556,7 +2409,7 @@ static int add_dso_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Zoom %s %s DSO", @@ -1565,7 +2418,6 @@ add_dso_opt(struct hist_browser *browser, struct popup_action *act, return 0; act->ms.map = map; - act->dso = map->dso; act->fn = do_zoom_dso; return 1; } @@ -1582,7 +2434,7 @@ static int add_map_opt(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct map *map) { - if (map == NULL) + if (!sort__has_dso || map == NULL) return 0; if (asprintf(optstr, "Browse map details") < 0) @@ -1684,6 +2536,9 @@ add_exit_opt(struct hist_browser *browser __maybe_unused, static int do_zoom_socket(struct hist_browser *browser, struct popup_action *act) { + if (!sort__has_socket || act->socket < 0) + return 0; + if (browser->hists->socket_filter > -1) { pstack__remove(browser->pstack, &browser->hists->socket_filter); browser->hists->socket_filter = -1; @@ -1703,7 +2558,7 @@ static int add_socket_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, int socket_id) { - if (socket_id < 0) + if (!sort__has_socket || socket_id < 0) return 0; if (asprintf(optstr, "Zoom %s Processor Socket %d", @@ -1721,17 +2576,60 @@ static void hist_browser__update_nr_entries(struct hist_browser *hb) u64 nr_entries = 0; struct rb_node *nd = rb_first(&hb->hists->entries); - if (hb->min_pcnt == 0) { + if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; return; } while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { nr_entries++; - nd = rb_next(nd); + nd = rb_hierarchy_next(nd); } hb->nr_non_filtered_entries = nr_entries; + hb->nr_hierarchy_entries = nr_entries; +} + +static void hist_browser__update_percent_limit(struct hist_browser *hb, + double percent) +{ + struct hist_entry *he; + struct rb_node *nd = rb_first(&hb->hists->entries); + u64 total = hists__total_period(hb->hists); + u64 min_callchain_hits = total * (percent / 100); + + hb->min_pcnt = callchain_param.min_percent = percent; + + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { + he = rb_entry(nd, struct hist_entry, rb_node); + + if (he->has_no_entry) { + he->has_no_entry = false; + he->nr_rows = 0; + } + + if (!he->leaf || !symbol_conf.use_callchain) + goto next; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + +next: + nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD); + + /* force to re-evaluate folding state of callchains */ + he->init_have_children = false; + hist_entry__set_folding(he, hb, false); + } } static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, @@ -1769,6 +2667,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "E Expand all callchains\n" \ "F Toggle percentage of filtered entries\n" \ "H Display column headers\n" \ + "L Change percent limit\n" \ "m Display context menu\n" \ "S Zoom into current Processor Socket\n" \ @@ -1796,10 +2695,9 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, SLang_reset_tty(); SLang_init_tty(0, 0, 0); - if (min_pcnt) { + if (min_pcnt) browser->min_pcnt = min_pcnt; - hist_browser__update_nr_entries(browser); - } + hist_browser__update_nr_entries(browser); browser->pstack = pstack__new(3); if (browser->pstack == NULL) @@ -1810,7 +2708,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, memset(options, 0, sizeof(options)); memset(actions, 0, sizeof(actions)); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(browser->hists, fmt) { perf_hpp__reset_width(fmt, hists); /* * This is done just once, and activates the horizontal scrolling @@ -1827,7 +2725,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, while (1) { struct thread *thread = NULL; - struct dso *dso = NULL; struct map *map = NULL; int choice = 0; int socked_id = -1; @@ -1839,8 +2736,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); map = browser->selection->map; - if (map) - dso = map->dso; socked_id = browser->he_selection->socket; } switch (key) { @@ -1874,7 +2769,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, hist_browser__dump(browser); continue; case 'd': - actions->dso = dso; + actions->ms.map = map; do_zoom_dso(browser, actions); continue; case 'V': @@ -1928,6 +2823,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, top->zero = !top->zero; } continue; + case 'L': + if (ui_browser__input_window("Percent Limit", + "Please enter the value you want to hide entries under that percent.", + buf, "ENTER: OK, ESC: Cancel", + delay_secs * 2) == K_ENTER) { + char *end; + double new_percent = strtod(buf, &end); + + if (new_percent < 0 || new_percent > 100) { + ui_browser__warning(&browser->b, delay_secs * 2, + "Invalid percent: %.2f", new_percent); + continue; + } + + hist_browser__update_percent_limit(browser, new_percent); + hist_browser__reset(browser); + } + continue; case K_F1: case 'h': case '?': @@ -1999,10 +2912,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } - if (!sort__has_sym) - goto add_exit_option; - - if (browser->selection == NULL) + if (!sort__has_sym || browser->selection == NULL) goto skip_annotation; if (sort__mode == SORT_MODE__BRANCH) { @@ -2042,11 +2952,16 @@ skip_annotation: &options[nr_options], socked_id); /* perf script support */ + if (!is_report_browser(hbt)) + goto skip_scripting; + if (browser->he_selection) { - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - thread, NULL); + if (sort__has_thread && thread) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + thread, NULL); + } /* * Note that browser->selection != NULL * when browser->he_selection is not NULL, @@ -2056,16 +2971,18 @@ skip_annotation: * * See hist_browser__show_entry. */ - nr_options += add_script_opt(browser, - &actions[nr_options], - &options[nr_options], - NULL, browser->selection->sym); + if (sort__has_sym && browser->selection->sym) { + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); + } } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); -add_exit_option: +skip_scripting: nr_options += add_exit_opt(browser, &actions[nr_options], &options[nr_options]); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 4b3585eed1e8..bd9bf7e343b1 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -89,8 +89,8 @@ void perf_gtk__init_hpp(void) perf_gtk__hpp_color_overhead_acc; } -static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, - GtkTreeIter *parent, int col, u64 total) +static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store, + GtkTreeIter *parent, int col, u64 total) { struct rb_node *nd; bool has_single_node = (rb_first(root) == rb_last(root)); @@ -100,13 +100,132 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, struct callchain_list *chain; GtkTreeIter iter, new_parent; bool need_new_parent; - double percent; - u64 hits, child_total; node = rb_entry(nd, struct callchain_node, rb_node); - hits = callchain_cumul_hits(node); - percent = 100.0 * hits / total; + new_parent = *parent; + need_new_parent = !has_single_node; + + callchain_node__make_parent_list(node); + + list_for_each_entry(chain, &node->parent_val, list) { + char buf[128]; + + gtk_tree_store_append(store, &iter, &new_parent); + + callchain_node__scnprintf_value(node, buf, sizeof(buf), total); + gtk_tree_store_set(store, &iter, 0, buf, -1); + + callchain_list__sym_name(chain, buf, sizeof(buf), false); + gtk_tree_store_set(store, &iter, col, buf, -1); + + if (need_new_parent) { + /* + * Only show the top-most symbol in a callchain + * if it's not the only callchain. + */ + new_parent = iter; + need_new_parent = false; + } + } + + list_for_each_entry(chain, &node->val, list) { + char buf[128]; + + gtk_tree_store_append(store, &iter, &new_parent); + + callchain_node__scnprintf_value(node, buf, sizeof(buf), total); + gtk_tree_store_set(store, &iter, 0, buf, -1); + + callchain_list__sym_name(chain, buf, sizeof(buf), false); + gtk_tree_store_set(store, &iter, col, buf, -1); + + if (need_new_parent) { + /* + * Only show the top-most symbol in a callchain + * if it's not the only callchain. + */ + new_parent = iter; + need_new_parent = false; + } + } + } +} + +static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store, + GtkTreeIter *parent, int col, u64 total) +{ + struct rb_node *nd; + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + struct callchain_node *node; + struct callchain_list *chain; + GtkTreeIter iter; + char buf[64]; + char *str, *str_alloc = NULL; + bool first = true; + + node = rb_entry(nd, struct callchain_node, rb_node); + + callchain_node__make_parent_list(node); + + list_for_each_entry(chain, &node->parent_val, list) { + char name[1024]; + + callchain_list__sym_name(chain, name, sizeof(name), false); + + if (asprintf(&str, "%s%s%s", + first ? "" : str_alloc, + first ? "" : symbol_conf.field_sep ?: "; ", + name) < 0) + return; + + first = false; + free(str_alloc); + str_alloc = str; + } + + list_for_each_entry(chain, &node->val, list) { + char name[1024]; + + callchain_list__sym_name(chain, name, sizeof(name), false); + + if (asprintf(&str, "%s%s%s", + first ? "" : str_alloc, + first ? "" : symbol_conf.field_sep ?: "; ", + name) < 0) + return; + + first = false; + free(str_alloc); + str_alloc = str; + } + + gtk_tree_store_append(store, &iter, parent); + + callchain_node__scnprintf_value(node, buf, sizeof(buf), total); + gtk_tree_store_set(store, &iter, 0, buf, -1); + + gtk_tree_store_set(store, &iter, col, str, -1); + + free(str_alloc); + } +} + +static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store, + GtkTreeIter *parent, int col, u64 total) +{ + struct rb_node *nd; + bool has_single_node = (rb_first(root) == rb_last(root)); + + for (nd = rb_first(root); nd; nd = rb_next(nd)) { + struct callchain_node *node; + struct callchain_list *chain; + GtkTreeIter iter, new_parent; + bool need_new_parent; + u64 child_total; + + node = rb_entry(nd, struct callchain_node, rb_node); new_parent = *parent; need_new_parent = !has_single_node && (node->val_nr > 1); @@ -116,7 +235,7 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, gtk_tree_store_append(store, &iter, &new_parent); - scnprintf(buf, sizeof(buf), "%5.2f%%", percent); + callchain_node__scnprintf_value(node, buf, sizeof(buf), total); gtk_tree_store_set(store, &iter, 0, buf, -1); callchain_list__sym_name(chain, buf, sizeof(buf), false); @@ -138,11 +257,22 @@ static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, child_total = total; /* Now 'iter' contains info of the last callchain_list */ - perf_gtk__add_callchain(&node->rb_root, store, &iter, col, - child_total); + perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col, + child_total); } } +static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store, + GtkTreeIter *parent, int col, u64 total) +{ + if (callchain_param.mode == CHAIN_FLAT) + perf_gtk__add_callchain_flat(root, store, parent, col, total); + else if (callchain_param.mode == CHAIN_FOLDED) + perf_gtk__add_callchain_folded(root, store, parent, col, total); + else + perf_gtk__add_callchain_graph(root, store, parent, col, total); +} + static void on_row_activated(GtkTreeView *view, GtkTreePath *path, GtkTreeViewColumn *col __maybe_unused, gpointer user_data __maybe_unused) @@ -176,7 +306,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, nr_cols = 0; - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) col_types[nr_cols++] = G_TYPE_STRING; store = gtk_tree_store_newv(nr_cols, col_types); @@ -187,8 +317,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_format(hists, fmt) { + if (perf_hpp__should_skip(fmt, hists)) continue; /* @@ -237,8 +367,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, col_idx = 0; - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_format(hists, fmt) { + if (perf_hpp__should_skip(fmt, h->hists)) continue; if (fmt->color) @@ -266,6 +396,194 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_container_add(GTK_CONTAINER(window), view); } +static void perf_gtk__add_hierarchy_entries(struct hists *hists, + struct rb_root *root, + GtkTreeStore *store, + GtkTreeIter *parent, + struct perf_hpp *hpp, + float min_pcnt) +{ + int col_idx = 0; + struct rb_node *node; + struct hist_entry *he; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + u64 total = hists__total_period(hists); + int size; + + for (node = rb_first(root); node; node = rb_next(node)) { + GtkTreeIter iter; + float percent; + char *bf; + + he = rb_entry(node, struct hist_entry, rb_node); + if (he->filtered) + continue; + + percent = hist_entry__get_percent_limit(he); + if (percent < min_pcnt) + continue; + + gtk_tree_store_append(store, &iter, parent); + + col_idx = 0; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1); + } + + bf = hpp->buf; + size = hpp->size; + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + int ret; + + if (fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + snprintf(hpp->buf + ret, hpp->size - ret, " "); + advance_hpp(hpp, ret + 2); + } + + gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); + + if (!he->leaf) { + hpp->buf = bf; + hpp->size = size; + + perf_gtk__add_hierarchy_entries(hists, &he->hroot_out, + store, &iter, hpp, + min_pcnt); + + if (!hist_entry__has_hierarchy_children(he, min_pcnt)) { + char buf[32]; + GtkTreeIter child; + + snprintf(buf, sizeof(buf), "no entry >= %.2f%%", + min_pcnt); + + gtk_tree_store_append(store, &child, &iter); + gtk_tree_store_set(store, &child, col_idx, buf, -1); + } + } + + if (symbol_conf.use_callchain && he->leaf) { + if (callchain_param.mode == CHAIN_GRAPH_REL) + total = symbol_conf.cumulate_callchain ? + he->stat_acc->period : he->stat.period; + + perf_gtk__add_callchain(&he->sorted_chain, store, &iter, + col_idx, total); + } + } + +} + +static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists, + float min_pcnt) +{ + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + GType col_types[MAX_COLUMNS]; + GtkCellRenderer *renderer; + GtkTreeStore *store; + GtkWidget *view; + int col_idx; + int nr_cols = 0; + char s[512]; + char buf[512]; + bool first_node, first_col; + struct perf_hpp hpp = { + .buf = s, + .size = sizeof(s), + }; + + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || + perf_hpp__is_dynamic_entry(fmt)) + break; + + col_types[nr_cols++] = G_TYPE_STRING; + } + col_types[nr_cols++] = G_TYPE_STRING; + + store = gtk_tree_store_newv(nr_cols, col_types); + view = gtk_tree_view_new(); + renderer = gtk_cell_renderer_text_new(); + + col_idx = 0; + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, fmt->name, + renderer, "markup", + col_idx++, NULL); + } + + /* construct merged column header since sort keys share single column */ + buf[0] = '\0'; + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) + strcat(buf, " / "); + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + strcat(buf, "+"); + first_col = false; + + fmt->header(fmt, &hpp, hists_to_evsel(hists)); + strcat(buf, ltrim(rtrim(hpp.buf))); + } + } + + gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), + -1, buf, + renderer, "markup", + col_idx++, NULL); + + for (col_idx = 0; col_idx < nr_cols; col_idx++) { + GtkTreeViewColumn *column; + + column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx); + gtk_tree_view_column_set_resizable(column, TRUE); + + if (col_idx == 0) { + gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view), + column); + } + } + + gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); + g_object_unref(GTK_TREE_MODEL(store)); + + perf_gtk__add_hierarchy_entries(hists, &hists->entries, store, + NULL, &hpp, min_pcnt); + + gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE); + + g_signal_connect(view, "row-activated", + G_CALLBACK(on_row_activated), NULL); + gtk_container_add(GTK_CONTAINER(window), view); +} + int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt __maybe_unused, @@ -333,7 +651,10 @@ int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, GTK_POLICY_AUTOMATIC, GTK_POLICY_AUTOMATIC); - perf_gtk__show_hists(scrolled_window, hists, min_pcnt); + if (symbol_conf.report_hierarchy) + perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt); + else + perf_gtk__show_hists(scrolled_window, hists, min_pcnt); tab_label = gtk_label_new(evname); diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 5029ba2b55af..3baeaa6e71b5 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -5,6 +5,7 @@ #include "../util/util.h" #include "../util/sort.h" #include "../util/evsel.h" +#include "../util/evlist.h" /* hist period print (hpp) functions */ @@ -371,7 +372,20 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return 0; } -#define HPP__COLOR_PRINT_FNS(_name, _fn) \ +static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a) +{ + return a->header == hpp__header_fn; +} + +static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b)) + return false; + + return a->idx == b->idx; +} + +#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -381,9 +395,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn) \ +#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -393,9 +409,11 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } -#define HPP__PRINT_FNS(_name, _fn) \ +#define HPP__PRINT_FNS(_name, _fn, _idx) \ { \ .name = _name, \ .header = hpp__header_fn, \ @@ -404,22 +422,25 @@ static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, .cmp = hpp__nop_cmp, \ .collapse = hpp__nop_cmp, \ .sort = hpp__sort_ ## _fn, \ + .idx = PERF_HPP__ ## _idx, \ + .equal = hpp__equal, \ } struct perf_hpp_fmt perf_hpp__format[] = { - HPP__COLOR_PRINT_FNS("Overhead", overhead), - HPP__COLOR_PRINT_FNS("sys", overhead_sys), - HPP__COLOR_PRINT_FNS("usr", overhead_us), - HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys), - HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us), - HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc), - HPP__PRINT_FNS("Samples", samples), - HPP__PRINT_FNS("Period", period) + HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD), + HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS), + HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US), + HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS), + HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US), + HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC), + HPP__PRINT_FNS("Samples", samples, SAMPLES), + HPP__PRINT_FNS("Period", period, PERIOD) }; -LIST_HEAD(perf_hpp__list); -LIST_HEAD(perf_hpp__sort_list); - +struct perf_hpp_list perf_hpp_list = { + .fields = LIST_HEAD_INIT(perf_hpp_list.fields), + .sorts = LIST_HEAD_INIT(perf_hpp_list.sorts), +}; #undef HPP__COLOR_PRINT_FNS #undef HPP__COLOR_ACC_PRINT_FNS @@ -443,7 +464,6 @@ LIST_HEAD(perf_hpp__sort_list); void perf_hpp__init(void) { - struct list_head *list; int i; for (i = 0; i < PERF_HPP__MAX_INDEX; i++) { @@ -484,22 +504,18 @@ void perf_hpp__init(void) if (symbol_conf.show_total_period) hpp_dimension__add_output(PERF_HPP__PERIOD); +} - /* prepend overhead field for backward compatiblity. */ - list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; - if (list_empty(list)) - list_add(list, &perf_hpp__sort_list); - - if (symbol_conf.cumulate_callchain) { - list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list; - if (list_empty(list)) - list_add(list, &perf_hpp__sort_list); - } +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) +{ + list_add_tail(&format->list, &list->fields); } -void perf_hpp__column_register(struct perf_hpp_fmt *format) +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format) { - list_add_tail(&format->list, &perf_hpp__list); + list_add_tail(&format->sort_list, &list->sorts); } void perf_hpp__column_unregister(struct perf_hpp_fmt *format) @@ -507,53 +523,43 @@ void perf_hpp__column_unregister(struct perf_hpp_fmt *format) list_del(&format->list); } -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +void perf_hpp__cancel_cumulate(void) { - list_add_tail(&format->sort_list, &perf_hpp__sort_list); -} + struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp; -void perf_hpp__column_enable(unsigned col) -{ - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_register(&perf_hpp__format[col]); -} + if (is_strict_order(field_order)) + return; -void perf_hpp__column_disable(unsigned col) -{ - BUG_ON(col >= PERF_HPP__MAX_INDEX); - perf_hpp__column_unregister(&perf_hpp__format[col]); + ovh = &perf_hpp__format[PERF_HPP__OVERHEAD]; + acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC]; + + perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) { + if (acc->equal(acc, fmt)) { + perf_hpp__column_unregister(fmt); + continue; + } + + if (ovh->equal(ovh, fmt)) + fmt->name = "Overhead"; + } } -void perf_hpp__cancel_cumulate(void) +static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) { - if (is_strict_order(field_order)) - return; - - perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); - perf_hpp__format[PERF_HPP__OVERHEAD].name = "Overhead"; + return a->equal && a->equal(a, b); } -void perf_hpp__setup_output_field(void) +void perf_hpp__setup_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append sort keys to output field */ - perf_hpp__for_each_sort_list(fmt) { - if (!list_empty(&fmt->list)) - continue; + perf_hpp_list__for_each_sort_list(list, fmt) { + struct perf_hpp_fmt *pos; - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; - - perf_hpp__for_each_format(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_format(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__column_register(fmt); @@ -562,27 +568,17 @@ next: } } -void perf_hpp__append_sort_keys(void) +void perf_hpp__append_sort_keys(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt; /* append output fields to sort keys */ - perf_hpp__for_each_format(fmt) { - if (!list_empty(&fmt->sort_list)) - continue; + perf_hpp_list__for_each_format(list, fmt) { + struct perf_hpp_fmt *pos; - /* - * sort entry fields are dynamically created, - * so they can share a same sort key even though - * the list is empty. - */ - if (perf_hpp__is_sort_entry(fmt)) { - struct perf_hpp_fmt *pos; - - perf_hpp__for_each_sort_list(pos) { - if (perf_hpp__same_sort_entry(pos, fmt)) - goto next; - } + perf_hpp_list__for_each_sort_list(list, pos) { + if (fmt_equal(fmt, pos)) + goto next; } perf_hpp__register_sort_field(fmt); @@ -591,20 +587,29 @@ next: } } -void perf_hpp__reset_output_field(void) + +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + if (fmt->free) + fmt->free(fmt); +} + +void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; /* reset output fields */ - perf_hpp__for_each_format_safe(fmt, tmp) { + perf_hpp_list__for_each_format_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } /* reset sort keys */ - perf_hpp__for_each_sort_list_safe(fmt, tmp) { + perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) { list_del_init(&fmt->list); list_del_init(&fmt->sort_list); + fmt_free(fmt); } } @@ -618,8 +623,8 @@ unsigned int hists__sort_list_width(struct hists *hists) bool first = true; struct perf_hpp dummy_hpp; - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_format(hists, fmt) { + if (perf_hpp__should_skip(fmt, hists)) continue; if (first) @@ -636,22 +641,39 @@ unsigned int hists__sort_list_width(struct hists *hists) return ret; } -void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) +unsigned int hists__overhead_width(struct hists *hists) { - int idx; - - if (perf_hpp__is_sort_entry(fmt)) - return perf_hpp__reset_sort_width(fmt, hists); + struct perf_hpp_fmt *fmt; + int ret = 0; + bool first = true; + struct perf_hpp dummy_hpp; - for (idx = 0; idx < PERF_HPP__MAX_INDEX; idx++) { - if (fmt == &perf_hpp__format[idx]) + hists__for_each_format(hists, fmt) { + if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) break; + + if (first) + first = false; + else + ret += 2; + + ret += fmt->width(fmt, &dummy_hpp, hists_to_evsel(hists)); } - if (idx == PERF_HPP__MAX_INDEX) + return ret; +} + +void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists) +{ + if (perf_hpp__is_sort_entry(fmt)) + return perf_hpp__reset_sort_width(fmt, hists); + + if (perf_hpp__is_dynamic_entry(fmt)) return; - switch (idx) { + BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX); + + switch (fmt->idx) { case PERF_HPP__OVERHEAD: case PERF_HPP__OVERHEAD_SYS: case PERF_HPP__OVERHEAD_US: @@ -679,7 +701,7 @@ void perf_hpp__set_user_width(const char *width_list_str) struct perf_hpp_fmt *fmt; const char *ptr = width_list_str; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { char *p; int len = strtol(ptr, &p, 10); @@ -691,3 +713,71 @@ void perf_hpp__set_user_width(const char *width_list_str) break; } } + +static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_list_node *node = NULL; + struct perf_hpp_fmt *fmt_copy; + bool found = false; + bool skip = perf_hpp__should_skip(fmt, hists); + + list_for_each_entry(node, &hists->hpp_formats, list) { + if (node->level == fmt->level) { + found = true; + break; + } + } + + if (!found) { + node = malloc(sizeof(*node)); + if (node == NULL) + return -1; + + node->skip = skip; + node->level = fmt->level; + perf_hpp_list__init(&node->hpp); + + hists->nr_hpp_node++; + list_add_tail(&node->list, &hists->hpp_formats); + } + + fmt_copy = perf_hpp_fmt__dup(fmt); + if (fmt_copy == NULL) + return -1; + + if (!skip) + node->skip = false; + + list_add_tail(&fmt_copy->list, &node->hpp.fields); + list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts); + + return 0; +} + +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct perf_hpp_fmt *fmt; + struct hists *hists; + int ret; + + if (!symbol_conf.report_hierarchy) + return 0; + + evlist__for_each(evlist, evsel) { + hists = evsel__hists(evsel); + + perf_hpp_list__for_each_sort_list(list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) + continue; + + ret = add_hierarchy_fmt(hists, fmt); + if (ret < 0) + return ret; + } + } + + return 0; +} diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index dfcbc90146ef..7aff5acf3265 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -34,10 +34,10 @@ static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask, return ret; } -static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, +static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, + struct callchain_list *chain, int depth, int depth_mask, int period, - u64 total_samples, u64 hits, - int left_margin) + u64 total_samples, int left_margin) { int i; size_t ret = 0; @@ -50,10 +50,9 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, else ret += fprintf(fp, " "); if (!period && i == depth - 1) { - double percent; - - percent = hits * 100.0 / total_samples; - ret += percent_color_fprintf(fp, "--%2.2f%%-- ", percent); + ret += fprintf(fp, "--"); + ret += callchain_node__fprintf_value(node, fp, total_samples); + ret += fprintf(fp, "--"); } else ret += fprintf(fp, "%s", " "); } @@ -82,13 +81,14 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, int depth_mask, int left_margin) { struct rb_node *node, *next; - struct callchain_node *child; + struct callchain_node *child = NULL; struct callchain_list *chain; int new_depth_mask = depth_mask; u64 remaining; size_t ret = 0; int i; uint entries_printed = 0; + int cumul_count = 0; remaining = total_samples; @@ -100,6 +100,7 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, child = rb_entry(node, struct callchain_node, rb_node); cumul = callchain_cumul_hits(child); remaining -= cumul; + cumul_count += callchain_cumul_counts(child); /* * The depth mask manages the output of pipes that show @@ -120,10 +121,9 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, left_margin); i = 0; list_for_each_entry(chain, &child->val, list) { - ret += ipchain__fprintf_graph(fp, chain, depth, + ret += ipchain__fprintf_graph(fp, child, chain, depth, new_depth_mask, i++, total_samples, - cumul, left_margin); } @@ -143,21 +143,50 @@ static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (callchain_param.mode == CHAIN_GRAPH_REL && remaining && remaining != total_samples) { + struct callchain_node rem_node = { + .hit = remaining, + }; if (!rem_sq_bracket) return ret; + if (callchain_param.value == CCVAL_COUNT && child && child->parent) { + rem_node.count = child->parent->children_count - cumul_count; + if (rem_node.count <= 0) + return ret; + } + new_depth_mask &= ~(1 << (depth - 1)); - ret += ipchain__fprintf_graph(fp, &rem_hits, depth, + ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth, new_depth_mask, 0, total_samples, - remaining, left_margin); + left_margin); } return ret; } +/* + * If have one single callchain root, don't bother printing + * its percentage (100 % in fractal mode and the same percentage + * than the hist in graph mode). This also avoid one level of column. + * + * However when percent-limit applied, it's possible that single callchain + * node have different (non-100% in fractal mode) percentage. + */ +static bool need_percent_display(struct rb_node *node, u64 parent_samples) +{ + struct callchain_node *cnode; + + if (rb_next(node)) + return true; + + cnode = rb_entry(node, struct callchain_node, rb_node); + return callchain_cumul_hits(cnode) != parent_samples; +} + static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, - u64 total_samples, int left_margin) + u64 total_samples, u64 parent_samples, + int left_margin) { struct callchain_node *cnode; struct callchain_list *chain; @@ -168,13 +197,8 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, int ret = 0; char bf[1024]; - /* - * If have one single callchain root, don't bother printing - * its percentage (100 % in fractal mode and the same percentage - * than the hist in graph mode). This also avoid one level of column. - */ node = rb_first(root); - if (node && !rb_next(node)) { + if (node && !need_percent_display(node, parent_samples)) { cnode = rb_entry(node, struct callchain_node, rb_node); list_for_each_entry(chain, &cnode->val, list) { /* @@ -204,9 +228,15 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, root = &cnode->rb_root; } + if (callchain_param.mode == CHAIN_GRAPH_REL) + total_samples = parent_samples; + ret += __callchain__fprintf_graph(fp, root, total_samples, 1, 1, left_margin); - ret += fprintf(fp, "\n"); + if (ret) { + /* do not add a blank line if it printed nothing */ + ret += fprintf(fp, "\n"); + } return ret; } @@ -243,12 +273,11 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree, struct rb_node *rb_node = rb_first(tree); while (rb_node) { - double percent; - chain = rb_entry(rb_node, struct callchain_node, rb_node); - percent = chain->hit * 100.0 / total_samples; - ret = percent_color_fprintf(fp, " %6.2f%%\n", percent); + ret += fprintf(fp, " "); + ret += callchain_node__fprintf_value(chain, fp, total_samples); + ret += fprintf(fp, "\n"); ret += __callchain__fprintf_flat(fp, chain, total_samples); ret += fprintf(fp, "\n"); if (++entries_printed == callchain_param.print_limit) @@ -260,24 +289,81 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree, return ret; } +static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = symbol_conf.field_sep ?: ";"; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (!node) + return 0; + + ret += __callchain__fprintf_folded(fp, node->parent); + + first = (ret == 0); + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, + bf, sizeof(bf), false)); + first = false; + } + + return ret; +} + +static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree, + u64 total_samples) +{ + size_t ret = 0; + u32 entries_printed = 0; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(tree); + + while (rb_node) { + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + + ret += callchain_node__fprintf_value(chain, fp, total_samples); + ret += fprintf(fp, " "); + ret += __callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + if (++entries_printed == callchain_param.print_limit) + break; + + rb_node = rb_next(rb_node); + } + + return ret; +} + static size_t hist_entry_callchain__fprintf(struct hist_entry *he, u64 total_samples, int left_margin, FILE *fp) { + u64 parent_samples = he->stat.period; + + if (symbol_conf.cumulate_callchain) + parent_samples = he->stat_acc->period; + switch (callchain_param.mode) { case CHAIN_GRAPH_REL: - return callchain__fprintf_graph(fp, &he->sorted_chain, - symbol_conf.cumulate_callchain ? - he->stat_acc->period : he->stat.period, - left_margin); + return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, + parent_samples, left_margin); break; case CHAIN_GRAPH_ABS: return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples, - left_margin); + parent_samples, left_margin); break; case CHAIN_FLAT: return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); break; + case CHAIN_FOLDED: + return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples); + break; case CHAIN_NONE: break; default: @@ -287,45 +373,66 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, return 0; } -static size_t hist_entry__callchain_fprintf(struct hist_entry *he, - struct hists *hists, - FILE *fp) +static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) { - int left_margin = 0; - u64 total_period = hists->stats.total_period; + const char *sep = symbol_conf.field_sep; + struct perf_hpp_fmt *fmt; + char *start = hpp->buf; + int ret; + bool first = true; - if (field_order == NULL && (sort_order == NULL || - !prefixcmp(sort_order, "comm"))) { - struct perf_hpp_fmt *fmt; + if (symbol_conf.exclude_other && !he->parent) + return 0; - perf_hpp__for_each_format(fmt) { - if (!perf_hpp__is_sort_entry(fmt)) - continue; + hists__for_each_format(he->hists, fmt) { + if (perf_hpp__should_skip(fmt, he->hists)) + continue; - /* must be 'comm' sort entry */ - left_margin = fmt->width(fmt, NULL, hists_to_evsel(hists)); - left_margin -= thread__comm_len(he->thread); - break; - } + /* + * If there's no field_sep, we still need + * to display initial ' '. + */ + if (!sep || !first) { + ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " "); + advance_hpp(hpp, ret); + } else + first = false; + + if (perf_hpp__use_color() && fmt->color) + ret = fmt->color(fmt, hpp, he); + else + ret = fmt->entry(fmt, hpp, he); + + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); + advance_hpp(hpp, ret); } - return hist_entry_callchain__fprintf(he, total_period, left_margin, fp); + + return hpp->buf - start; } -static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) +static int hist_entry__hierarchy_fprintf(struct hist_entry *he, + struct perf_hpp *hpp, + struct hists *hists, + FILE *fp) { const char *sep = symbol_conf.field_sep; struct perf_hpp_fmt *fmt; - char *start = hpp->buf; - int ret; + struct perf_hpp_list_node *fmt_node; + char *buf = hpp->buf; + size_t size = hpp->size; + int ret, printed = 0; bool first = true; if (symbol_conf.exclude_other && !he->parent) return 0; - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) - continue; + ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, ""); + advance_hpp(hpp, ret); + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { /* * If there's no field_sep, we still need * to display initial ' '. @@ -341,10 +448,47 @@ static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp) else ret = fmt->entry(fmt, hpp, he); + ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret); advance_hpp(hpp, ret); } - return hpp->buf - start; + if (!sep) + ret = scnprintf(hpp->buf, hpp->size, "%*s", + (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, ""); + advance_hpp(hpp, ret); + + printed += fprintf(fp, "%s", buf); + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + hpp->buf = buf; + hpp->size = size; + + /* + * No need to call hist_entry__snprintf_alignment() since this + * fmt is always the last column in the hierarchy mode. + */ + if (perf_hpp__use_color() && fmt->color) + fmt->color(fmt, hpp, he); + else + fmt->entry(fmt, hpp, he); + + /* + * dynamic entries are right-aligned but we want left-aligned + * in the hierarchy mode + */ + printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); + } + printed += putc('\n', fp); + + if (symbol_conf.use_callchain && he->leaf) { + u64 total = hists__total_period(hists); + + printed += hist_entry_callchain__fprintf(he, total, 0, fp); + goto out; + } + +out: + return printed; } static int hist_entry__fprintf(struct hist_entry *he, size_t size, @@ -356,24 +500,134 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, .buf = bf, .size = size, }; + u64 total_period = hists->stats.total_period; if (size == 0 || size > bfsz) size = hpp.size = bfsz; + if (symbol_conf.report_hierarchy) + return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); + hist_entry__snprintf(he, &hpp); ret = fprintf(fp, "%s\n", bf); if (symbol_conf.use_callchain) - ret += hist_entry__callchain_fprintf(he, hists, fp); + ret += hist_entry_callchain__fprintf(he, total_period, 0, fp); return ret; } +static int print_hierarchy_indent(const char *sep, int indent, + const char *line, FILE *fp) +{ + if (sep != NULL || indent < 2) + return 0; + + return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); +} + +static int print_hierarchy_header(struct hists *hists, struct perf_hpp *hpp, + const char *sep, FILE *fp) +{ + bool first_node, first_col; + int indent; + int depth; + unsigned width = 0; + unsigned header_width = 0; + struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; + + indent = hists->nr_hpp_node; + + /* preserve max indent depth for column headers */ + print_hierarchy_indent(sep, indent, spaces, fp); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + fmt->header(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%s%s", hpp->buf, sep ?: " "); + } + + /* combine sort headers with ' / ' */ + first_node = true; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + if (!first_node) + header_width += fprintf(fp, " / "); + first_node = false; + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + header_width += fprintf(fp, "+"); + first_col = false; + + fmt->header(fmt, hpp, hists_to_evsel(hists)); + rtrim(hpp->buf); + + header_width += fprintf(fp, "%s", ltrim(hpp->buf)); + } + } + + fprintf(fp, "\n# "); + + /* preserve max indent depth for initial dots */ + print_hierarchy_indent(sep, indent, dots, fp); + + /* the first hpp_list_node is for overhead columns */ + fmt_node = list_first_entry(&hists->hpp_formats, + struct perf_hpp_list_node, list); + + first_col = true; + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (!first_col) + fprintf(fp, "%s", sep ?: ".."); + first_col = false; + + width = fmt->width(fmt, hpp, hists_to_evsel(hists)); + fprintf(fp, "%.*s", width, dots); + } + + depth = 0; + list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) { + first_col = true; + width = depth * HIERARCHY_INDENT; + + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) { + if (perf_hpp__should_skip(fmt, hists)) + continue; + + if (!first_col) + width++; /* for '+' sign between column header */ + first_col = false; + + width += fmt->width(fmt, hpp, hists_to_evsel(hists)); + } + + if (width > header_width) + header_width = width; + + depth++; + } + + fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots); + + fprintf(fp, "\n#\n"); + + return 2; +} + size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp) { struct perf_hpp_fmt *fmt; + struct perf_hpp_list_node *fmt_node; struct rb_node *nd; size_t ret = 0; unsigned int width; @@ -387,10 +641,11 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, bool first = true; size_t linesz; char *line = NULL; + unsigned indent; init_rem_hits(); - perf_hpp__for_each_format(fmt) + hists__for_each_format(hists, fmt) perf_hpp__reset_width(fmt, hists); if (symbol_conf.col_width_list_str) @@ -401,8 +656,17 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { - if (perf_hpp__should_skip(fmt)) + if (symbol_conf.report_hierarchy) { + list_for_each_entry(fmt_node, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) + perf_hpp__reset_width(fmt, hists); + } + nr_rows += print_hierarchy_header(hists, &dummy_hpp, sep, fp); + goto print_entries; + } + + hists__for_each_format(hists, fmt) { + if (perf_hpp__should_skip(fmt, hists)) continue; if (!first) @@ -425,10 +689,10 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, fprintf(fp, "# "); - perf_hpp__for_each_format(fmt) { + hists__for_each_format(hists, fmt) { unsigned int i; - if (perf_hpp__should_skip(fmt)) + if (perf_hpp__should_skip(fmt, hists)) continue; if (!first) @@ -458,7 +722,9 @@ print_entries: goto out; } - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + indent = hists__overhead_width(hists) + 4; + + for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); float percent; @@ -474,6 +740,20 @@ print_entries: if (max_rows && ++nr_rows >= max_rows) break; + /* + * If all children are filtered out or percent-limited, + * display "no entry >= x.xx%" message. + */ + if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { + int depth = hists->nr_hpp_node + h->depth + 1; + + print_hierarchy_indent(sep, depth, spaces, fp); + fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); + + if (max_rows && ++nr_rows >= max_rows) + break; + } + if (h->ms.map == NULL && verbose > 1) { __map_groups__fprintf_maps(h->thread->mg, MAP__FUNCTION, fp); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 591b3fe3ed49..eea25e2424e9 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,24 +6,20 @@ libperf-y += config.o libperf-y += ctype.o libperf-y += db-export.o libperf-y += env.o -libperf-y += environment.o libperf-y += event.o libperf-y += evlist.o libperf-y += evsel.o -libperf-y += exec_cmd.o -libperf-y += find_next_bit.o -libperf-y += help.o +libperf-y += find_bit.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o -libperf-y += parse-options.o libperf-y += parse-events.o libperf-y += perf_regs.o libperf-y += path.o libperf-y += rbtree.o +libperf-y += libstring.o libperf-y += bitmap.o libperf-y += hweight.o -libperf-y += run-command.o libperf-y += quote.o libperf-y += strbuf.o libperf-y += string.o @@ -32,11 +28,9 @@ libperf-y += strfilter.o libperf-y += top.o libperf-y += usage.o libperf-y += wrapper.o -libperf-y += sigchain.o libperf-y += dso.o libperf-y += symbol.o libperf-y += color.o -libperf-y += pager.o libperf-y += header.o libperf-y += callchain.o libperf-y += values.o @@ -86,8 +80,12 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-y += parse-branch-options.o libperf-y += parse-regs-options.o +libperf-y += term.o +libperf-y += help-unknown-cmd.o +libperf-y += mem-events.o libperf-$(CONFIG_LIBBPF) += bpf-loader.o +libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o libperf-$(CONFIG_LIBELF) += symbol-elf.o libperf-$(CONFIG_LIBELF) += probe-file.o libperf-$(CONFIG_LIBELF) += probe-event.o @@ -108,9 +106,17 @@ libperf-y += scripting-engines/ libperf-$(CONFIG_ZLIB) += zlib.o libperf-$(CONFIG_LZMA) += lzma.o +libperf-y += demangle-java.o + +ifdef CONFIG_JITDUMP +libperf-$(CONFIG_LIBELF) += jitdump.o +libperf-$(CONFIG_LIBELF) += genelf.o +libperf-$(CONFIG_LIBELF) += genelf_debug.o +endif CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" +# avoid compiler warnings in 32-bit mode +CFLAGS_genelf_debug.o += -Wno-packed $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c $(call rule_mkdir) @@ -136,8 +142,10 @@ CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c -CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" CFLAGS_parse-events.o += -Wno-redundant-decls @@ -145,7 +153,11 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE +$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) @@ -153,6 +165,10 @@ $(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) +$(OUTPUT)util/libstring.o: ../lib/string.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0fc8d7a2fea5..b795b6994144 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -65,6 +65,11 @@ static int call__parse(struct ins_operands *ops) name++; +#ifdef __arm__ + if (strchr(name, '+')) + return -1; +#endif + tok = strchr(name, '>'); if (tok == NULL) return -1; @@ -246,7 +251,11 @@ static int mov__parse(struct ins_operands *ops) return -1; target = ++s; +#ifdef __arm__ + comment = strchr(s, ';'); +#else comment = strchr(s, '#'); +#endif if (comment != NULL) s = comment - 1; @@ -354,6 +363,20 @@ static struct ins instructions[] = { { .name = "addq", .ops = &mov_ops, }, { .name = "addw", .ops = &mov_ops, }, { .name = "and", .ops = &mov_ops, }, +#ifdef __arm__ + { .name = "b", .ops = &jump_ops, }, // might also be a call + { .name = "bcc", .ops = &jump_ops, }, + { .name = "bcs", .ops = &jump_ops, }, + { .name = "beq", .ops = &jump_ops, }, + { .name = "bge", .ops = &jump_ops, }, + { .name = "bgt", .ops = &jump_ops, }, + { .name = "bhi", .ops = &jump_ops, }, + { .name = "bl", .ops = &call_ops, }, + { .name = "blt", .ops = &jump_ops, }, + { .name = "bls", .ops = &jump_ops, }, + { .name = "blx", .ops = &call_ops, }, + { .name = "bne", .ops = &jump_ops, }, +#endif { .name = "bts", .ops = &mov_ops, }, { .name = "call", .ops = &call_ops, }, { .name = "callq", .ops = &call_ops, }, @@ -1084,6 +1107,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize) struct kcore_extract kce; bool delete_extract = false; int lineno = 0; + int nline; if (filename) symbol__join_symfs(symfs_filename, filename); @@ -1179,6 +1203,9 @@ fallback: ret = decompress_to_file(m.ext, symfs_filename, fd); + if (ret) + pr_err("Cannot decompress %s %s\n", m.ext, symfs_filename); + free(m.ext); close(fd); @@ -1204,13 +1231,25 @@ fallback: pr_debug("Executing: %s\n", command); file = popen(command, "r"); - if (!file) + if (!file) { + pr_err("Failure running %s\n", command); + /* + * If we were using debug info should retry with + * original binary. + */ goto out_remove_tmp; + } - while (!feof(file)) + nline = 0; + while (!feof(file)) { if (symbol__parse_objdump_line(sym, map, file, privsize, &lineno) < 0) break; + nline++; + } + + if (nline == 0) + pr_err("No output from %s\n", command); /* * kallsyms does not have symbol sizes so there may a nop at the end. @@ -1604,6 +1643,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, len = symbol__size(sym); if (print_lines) { + srcline_full_filename = full_paths; symbol__get_source_line(sym, map, evsel, &source_line, len); print_summary(&source_line, dso->long_name); } diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 7f10430af39c..ec164fe70718 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -45,7 +45,7 @@ #include "event.h" #include "session.h" #include "debug.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "intel-pt.h" #include "intel-bts.h" @@ -478,10 +478,11 @@ void auxtrace_heap__pop(struct auxtrace_heap *heap) heap_array[last].ordinal); } -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr) +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist) { if (itr) - return itr->info_priv_size(itr); + return itr->info_priv_size(itr, evlist); return 0; } @@ -852,7 +853,7 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, int err; pr_debug2("Synthesizing auxtrace information\n"); - priv_size = auxtrace_record__info_priv_size(itr); + priv_size = auxtrace_record__info_priv_size(itr, session->evlist); ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size); if (!ev) return -ENOMEM; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index b86f90db1352..e5a8e2d4f2af 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -293,7 +293,8 @@ struct auxtrace_record { int (*recording_options)(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); - size_t (*info_priv_size)(struct auxtrace_record *itr); + size_t (*info_priv_size)(struct auxtrace_record *itr, + struct perf_evlist *evlist); int (*info_fill)(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, @@ -429,7 +430,8 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, int auxtrace_record__options(struct auxtrace_record *itr, struct perf_evlist *evlist, struct record_opts *opts); -size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr); +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr, + struct perf_evlist *evlist); int auxtrace_record__info_fill(struct auxtrace_record *itr, struct perf_session *session, struct auxtrace_info_event *auxtrace_info, diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c deleted file mode 100644 index 0a1adc1111fd..000000000000 --- a/tools/perf/util/bitmap.c +++ /dev/null @@ -1,31 +0,0 @@ -/* - * From lib/bitmap.c - * Helper functions for bitmap.h. - * - * This source code is licensed under the GNU General Public License, - * Version 2. See the file COPYING for more details. - */ -#include <linux/bitmap.h> - -int __bitmap_weight(const unsigned long *bitmap, int bits) -{ - int k, w = 0, lim = bits/BITS_PER_LONG; - - for (k = 0; k < lim; k++) - w += hweight_long(bitmap[k]); - - if (bits % BITS_PER_LONG) - w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits)); - - return w; -} - -void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits) -{ - int k; - int nr = BITS_TO_LONGS(bits); - - for (k = 0; k < nr; k++) - dst[k] = bitmap1[k] | bitmap2[k]; -} diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index ba6f7526b282..0967ce601931 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -5,13 +5,19 @@ * Copyright (C) 2015 Huawei Inc. */ +#include <linux/bpf.h> #include <bpf/libbpf.h> +#include <bpf/bpf.h> #include <linux/err.h> +#include <linux/string.h> #include "perf.h" #include "debug.h" #include "bpf-loader.h" +#include "bpf-prologue.h" +#include "llvm-utils.h" #include "probe-event.h" #include "probe-finder.h" // for MAX_PROBES +#include "parse-events.h" #include "llvm-utils.h" #define DEFINE_PRINT_FN(name, level) \ @@ -26,18 +32,44 @@ static int libbpf_##name(const char *fmt, ...) \ return ret; \ } -DEFINE_PRINT_FN(warning, 0) -DEFINE_PRINT_FN(info, 0) +DEFINE_PRINT_FN(warning, 1) +DEFINE_PRINT_FN(info, 1) DEFINE_PRINT_FN(debug, 1) struct bpf_prog_priv { struct perf_probe_event pev; + bool need_prologue; + struct bpf_insn *insns_buf; + int nr_types; + int *type_mapping; }; +static bool libbpf_initialized; + +struct bpf_object * +bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name) +{ + struct bpf_object *obj; + + if (!libbpf_initialized) { + libbpf_set_print(libbpf_warning, + libbpf_info, + libbpf_debug); + libbpf_initialized = true; + } + + obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); + if (IS_ERR(obj)) { + pr_debug("bpf: failed to load buffer\n"); + return ERR_PTR(-EINVAL); + } + + return obj; +} + struct bpf_object *bpf__prepare_load(const char *filename, bool source) { struct bpf_object *obj; - static bool libbpf_initialized; if (!libbpf_initialized) { libbpf_set_print(libbpf_warning, @@ -53,15 +85,15 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); if (err) - return ERR_PTR(err); + return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); free(obj_buf); } else obj = bpf_object__open(filename); - if (!obj) { + if (IS_ERR(obj)) { pr_debug("bpf: failed to load %s\n", filename); - return ERR_PTR(-EINVAL); + return obj; } return obj; @@ -78,16 +110,184 @@ void bpf__clear(void) } static void -bpf_prog_priv__clear(struct bpf_program *prog __maybe_unused, - void *_priv) +clear_prog_priv(struct bpf_program *prog __maybe_unused, + void *_priv) { struct bpf_prog_priv *priv = _priv; cleanup_perf_probe_events(&priv->pev, 1); + zfree(&priv->insns_buf); + zfree(&priv->type_mapping); free(priv); } static int +prog_config__exec(const char *value, struct perf_probe_event *pev) +{ + pev->uprobes = true; + pev->target = strdup(value); + if (!pev->target) + return -ENOMEM; + return 0; +} + +static int +prog_config__module(const char *value, struct perf_probe_event *pev) +{ + pev->uprobes = false; + pev->target = strdup(value); + if (!pev->target) + return -ENOMEM; + return 0; +} + +static int +prog_config__bool(const char *value, bool *pbool, bool invert) +{ + int err; + bool bool_value; + + if (!pbool) + return -EINVAL; + + err = strtobool(value, &bool_value); + if (err) + return err; + + *pbool = invert ? !bool_value : bool_value; + return 0; +} + +static int +prog_config__inlines(const char *value, + struct perf_probe_event *pev __maybe_unused) +{ + return prog_config__bool(value, &probe_conf.no_inlines, true); +} + +static int +prog_config__force(const char *value, + struct perf_probe_event *pev __maybe_unused) +{ + return prog_config__bool(value, &probe_conf.force_add, false); +} + +static struct { + const char *key; + const char *usage; + const char *desc; + int (*func)(const char *, struct perf_probe_event *); +} bpf_prog_config_terms[] = { + { + .key = "exec", + .usage = "exec=<full path of file>", + .desc = "Set uprobe target", + .func = prog_config__exec, + }, + { + .key = "module", + .usage = "module=<module name> ", + .desc = "Set kprobe module", + .func = prog_config__module, + }, + { + .key = "inlines", + .usage = "inlines=[yes|no] ", + .desc = "Probe at inline symbol", + .func = prog_config__inlines, + }, + { + .key = "force", + .usage = "force=[yes|no] ", + .desc = "Forcibly add events with existing name", + .func = prog_config__force, + }, +}; + +static int +do_prog_config(const char *key, const char *value, + struct perf_probe_event *pev) +{ + unsigned int i; + + pr_debug("config bpf program: %s=%s\n", key, value); + for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) + if (strcmp(key, bpf_prog_config_terms[i].key) == 0) + return bpf_prog_config_terms[i].func(value, pev); + + pr_debug("BPF: ERROR: invalid program config option: %s=%s\n", + key, value); + + pr_debug("\nHint: Valid options are:\n"); + for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++) + pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage, + bpf_prog_config_terms[i].desc); + pr_debug("\n"); + + return -BPF_LOADER_ERRNO__PROGCONF_TERM; +} + +static const char * +parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev) +{ + char *text = strdup(config_str); + char *sep, *line; + const char *main_str = NULL; + int err = 0; + + if (!text) { + pr_debug("No enough memory: dup config_str failed\n"); + return ERR_PTR(-ENOMEM); + } + + line = text; + while ((sep = strchr(line, ';'))) { + char *equ; + + *sep = '\0'; + equ = strchr(line, '='); + if (!equ) { + pr_warning("WARNING: invalid config in BPF object: %s\n", + line); + pr_warning("\tShould be 'key=value'.\n"); + goto nextline; + } + *equ = '\0'; + + err = do_prog_config(line, equ + 1, pev); + if (err) + break; +nextline: + line = sep + 1; + } + + if (!err) + main_str = config_str + (line - text); + free(text); + + return err ? ERR_PTR(err) : main_str; +} + +static int +parse_prog_config(const char *config_str, struct perf_probe_event *pev) +{ + int err; + const char *main_str = parse_prog_config_kvpair(config_str, pev); + + if (IS_ERR(main_str)) + return PTR_ERR(main_str); + + err = parse_perf_probe_command(main_str, pev); + if (err < 0) { + pr_debug("bpf: '%s' is not a valid config string\n", + config_str); + /* parse failed, don't need clear pev. */ + return -BPF_LOADER_ERRNO__CONFIG; + } + return 0; +} + +static int config_bpf_program(struct bpf_program *prog) { struct perf_probe_event *pev = NULL; @@ -95,10 +295,14 @@ config_bpf_program(struct bpf_program *prog) const char *config_str; int err; + /* Initialize per-program probing setting */ + probe_conf.no_inlines = false; + probe_conf.force_add = false; + config_str = bpf_program__title(prog, false); - if (!config_str) { + if (IS_ERR(config_str)) { pr_debug("bpf: unable to get title for program\n"); - return -EINVAL; + return PTR_ERR(config_str); } priv = calloc(sizeof(*priv), 1); @@ -109,18 +313,14 @@ config_bpf_program(struct bpf_program *prog) pev = &priv->pev; pr_debug("bpf: config program '%s'\n", config_str); - err = parse_perf_probe_command(config_str, pev); - if (err < 0) { - pr_debug("bpf: '%s' is not a valid config string\n", - config_str); - err = -EINVAL; + err = parse_prog_config(config_str, pev); + if (err) goto errout; - } if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) { pr_debug("bpf: '%s': group for event is set and not '%s'.\n", config_str, PERF_BPF_PROBE_GROUP); - err = -EINVAL; + err = -BPF_LOADER_ERRNO__GROUP; goto errout; } else if (!pev->group) pev->group = strdup(PERF_BPF_PROBE_GROUP); @@ -132,14 +332,14 @@ config_bpf_program(struct bpf_program *prog) } if (!pev->event) { - pr_debug("bpf: '%s': event name is missing\n", + pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n", config_str); - err = -EINVAL; + err = -BPF_LOADER_ERRNO__EVENTNAME; goto errout; } pr_debug("bpf: config '%s' is ok\n", config_str); - err = bpf_program__set_private(prog, priv, bpf_prog_priv__clear); + err = bpf_program__set_private(prog, priv, clear_prog_priv); if (err) { pr_debug("Failed to set priv for program '%s'\n", config_str); goto errout; @@ -175,6 +375,220 @@ static int bpf__prepare_probe(void) return err; } +static int +preproc_gen_prologue(struct bpf_program *prog, int n, + struct bpf_insn *orig_insns, int orig_insns_cnt, + struct bpf_prog_prep_result *res) +{ + struct probe_trace_event *tev; + struct perf_probe_event *pev; + struct bpf_prog_priv *priv; + struct bpf_insn *buf; + size_t prologue_cnt = 0; + int i, err; + + err = bpf_program__get_private(prog, (void **)&priv); + if (err || !priv) + goto errout; + + pev = &priv->pev; + + if (n < 0 || n >= priv->nr_types) + goto errout; + + /* Find a tev belongs to that type */ + for (i = 0; i < pev->ntevs; i++) { + if (priv->type_mapping[i] == n) + break; + } + + if (i >= pev->ntevs) { + pr_debug("Internal error: prologue type %d not found\n", n); + return -BPF_LOADER_ERRNO__PROLOGUE; + } + + tev = &pev->tevs[i]; + + buf = priv->insns_buf; + err = bpf__gen_prologue(tev->args, tev->nargs, + buf, &prologue_cnt, + BPF_MAXINSNS - orig_insns_cnt); + if (err) { + const char *title; + + title = bpf_program__title(prog, false); + if (!title) + title = "[unknown]"; + + pr_debug("Failed to generate prologue for program %s\n", + title); + return err; + } + + memcpy(&buf[prologue_cnt], orig_insns, + sizeof(struct bpf_insn) * orig_insns_cnt); + + res->new_insn_ptr = buf; + res->new_insn_cnt = prologue_cnt + orig_insns_cnt; + res->pfd = NULL; + return 0; + +errout: + pr_debug("Internal error in preproc_gen_prologue\n"); + return -BPF_LOADER_ERRNO__PROLOGUE; +} + +/* + * compare_tev_args is reflexive, transitive and antisymmetric. + * I can proof it but this margin is too narrow to contain. + */ +static int compare_tev_args(const void *ptev1, const void *ptev2) +{ + int i, ret; + const struct probe_trace_event *tev1 = + *(const struct probe_trace_event **)ptev1; + const struct probe_trace_event *tev2 = + *(const struct probe_trace_event **)ptev2; + + ret = tev2->nargs - tev1->nargs; + if (ret) + return ret; + + for (i = 0; i < tev1->nargs; i++) { + struct probe_trace_arg *arg1, *arg2; + struct probe_trace_arg_ref *ref1, *ref2; + + arg1 = &tev1->args[i]; + arg2 = &tev2->args[i]; + + ret = strcmp(arg1->value, arg2->value); + if (ret) + return ret; + + ref1 = arg1->ref; + ref2 = arg2->ref; + + while (ref1 && ref2) { + ret = ref2->offset - ref1->offset; + if (ret) + return ret; + + ref1 = ref1->next; + ref2 = ref2->next; + } + + if (ref1 || ref2) + return ref2 ? 1 : -1; + } + + return 0; +} + +/* + * Assign a type number to each tevs in a pev. + * mapping is an array with same slots as tevs in that pev. + * nr_types will be set to number of types. + */ +static int map_prologue(struct perf_probe_event *pev, int *mapping, + int *nr_types) +{ + int i, type = 0; + struct probe_trace_event **ptevs; + + size_t array_sz = sizeof(*ptevs) * pev->ntevs; + + ptevs = malloc(array_sz); + if (!ptevs) { + pr_debug("No ehough memory: alloc ptevs failed\n"); + return -ENOMEM; + } + + pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs); + for (i = 0; i < pev->ntevs; i++) + ptevs[i] = &pev->tevs[i]; + + qsort(ptevs, pev->ntevs, sizeof(*ptevs), + compare_tev_args); + + for (i = 0; i < pev->ntevs; i++) { + int n; + + n = ptevs[i] - pev->tevs; + if (i == 0) { + mapping[n] = type; + pr_debug("mapping[%d]=%d\n", n, type); + continue; + } + + if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0) + mapping[n] = type; + else + mapping[n] = ++type; + + pr_debug("mapping[%d]=%d\n", n, mapping[n]); + } + free(ptevs); + *nr_types = type + 1; + + return 0; +} + +static int hook_load_preprocessor(struct bpf_program *prog) +{ + struct perf_probe_event *pev; + struct bpf_prog_priv *priv; + bool need_prologue = false; + int err, i; + + err = bpf_program__get_private(prog, (void **)&priv); + if (err || !priv) { + pr_debug("Internal error when hook preprocessor\n"); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + pev = &priv->pev; + for (i = 0; i < pev->ntevs; i++) { + struct probe_trace_event *tev = &pev->tevs[i]; + + if (tev->nargs > 0) { + need_prologue = true; + break; + } + } + + /* + * Since all tevs don't have argument, we don't need generate + * prologue. + */ + if (!need_prologue) { + priv->need_prologue = false; + return 0; + } + + priv->need_prologue = true; + priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS); + if (!priv->insns_buf) { + pr_debug("No enough memory: alloc insns_buf failed\n"); + return -ENOMEM; + } + + priv->type_mapping = malloc(sizeof(int) * pev->ntevs); + if (!priv->type_mapping) { + pr_debug("No enough memory: alloc type_mapping failed\n"); + return -ENOMEM; + } + memset(priv->type_mapping, -1, + sizeof(int) * pev->ntevs); + + err = map_prologue(pev, priv->type_mapping, &priv->nr_types); + if (err) + return err; + + err = bpf_program__set_prep(prog, priv->nr_types, + preproc_gen_prologue); + return err; +} + int bpf__probe(struct bpf_object *obj) { int err = 0; @@ -209,6 +623,18 @@ int bpf__probe(struct bpf_object *obj) pr_debug("bpf_probe: failed to apply perf probe events"); goto out; } + + /* + * After probing, let's consider prologue, which + * adds program fetcher to BPF programs. + * + * hook_load_preprocessorr() hooks pre-processor + * to bpf_program, let it generate prologue + * dynamically during loading. + */ + err = hook_load_preprocessor(prog); + if (err) + goto out; } out: return err < 0 ? err : 0; @@ -285,14 +711,21 @@ int bpf__foreach_tev(struct bpf_object *obj, (void **)&priv); if (err || !priv) { pr_debug("bpf: failed to get private field\n"); - return -EINVAL; + return -BPF_LOADER_ERRNO__INTERNAL; } pev = &priv->pev; for (i = 0; i < pev->ntevs; i++) { tev = &pev->tevs[i]; - fd = bpf_program__fd(prog); + if (priv->need_prologue) { + int type = priv->type_mapping[i]; + + fd = bpf_program__nth_fd(prog, type); + } else { + fd = bpf_program__fd(prog); + } + if (fd < 0) { pr_debug("bpf: failed to get file descriptor\n"); return fd; @@ -308,13 +741,751 @@ int bpf__foreach_tev(struct bpf_object *obj, return 0; } +enum bpf_map_op_type { + BPF_MAP_OP_SET_VALUE, + BPF_MAP_OP_SET_EVSEL, +}; + +enum bpf_map_key_type { + BPF_MAP_KEY_ALL, + BPF_MAP_KEY_RANGES, +}; + +struct bpf_map_op { + struct list_head list; + enum bpf_map_op_type op_type; + enum bpf_map_key_type key_type; + union { + struct parse_events_array array; + } k; + union { + u64 value; + struct perf_evsel *evsel; + } v; +}; + +struct bpf_map_priv { + struct list_head ops_list; +}; + +static void +bpf_map_op__delete(struct bpf_map_op *op) +{ + if (!list_empty(&op->list)) + list_del(&op->list); + if (op->key_type == BPF_MAP_KEY_RANGES) + parse_events__clear_array(&op->k.array); + free(op); +} + +static void +bpf_map_priv__purge(struct bpf_map_priv *priv) +{ + struct bpf_map_op *pos, *n; + + list_for_each_entry_safe(pos, n, &priv->ops_list, list) { + list_del_init(&pos->list); + bpf_map_op__delete(pos); + } +} + +static void +bpf_map_priv__clear(struct bpf_map *map __maybe_unused, + void *_priv) +{ + struct bpf_map_priv *priv = _priv; + + bpf_map_priv__purge(priv); + free(priv); +} + +static int +bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term) +{ + op->key_type = BPF_MAP_KEY_ALL; + if (!term) + return 0; + + if (term->array.nr_ranges) { + size_t memsz = term->array.nr_ranges * + sizeof(op->k.array.ranges[0]); + + op->k.array.ranges = memdup(term->array.ranges, memsz); + if (!op->k.array.ranges) { + pr_debug("No enough memory to alloc indices for map\n"); + return -ENOMEM; + } + op->key_type = BPF_MAP_KEY_RANGES; + op->k.array.nr_ranges = term->array.nr_ranges; + } + return 0; +} + +static struct bpf_map_op * +bpf_map_op__new(struct parse_events_term *term) +{ + struct bpf_map_op *op; + int err; + + op = zalloc(sizeof(*op)); + if (!op) { + pr_debug("Failed to alloc bpf_map_op\n"); + return ERR_PTR(-ENOMEM); + } + INIT_LIST_HEAD(&op->list); + + err = bpf_map_op_setkey(op, term); + if (err) { + free(op); + return ERR_PTR(err); + } + return op; +} + +static int +bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op) +{ + struct bpf_map_priv *priv; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("Failed to get private from map %s\n", map_name); + return err; + } + + if (!priv) { + priv = zalloc(sizeof(*priv)); + if (!priv) { + pr_debug("No enough memory to alloc map private\n"); + return -ENOMEM; + } + INIT_LIST_HEAD(&priv->ops_list); + + if (bpf_map__set_private(map, priv, bpf_map_priv__clear)) { + free(priv); + return -BPF_LOADER_ERRNO__INTERNAL; + } + } + + list_add_tail(&op->list, &priv->ops_list); + return 0; +} + +static struct bpf_map_op * +bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term) +{ + struct bpf_map_op *op; + int err; + + op = bpf_map_op__new(term); + if (IS_ERR(op)) + return op; + + err = bpf_map__add_op(map, op); + if (err) { + bpf_map_op__delete(op); + return ERR_PTR(err); + } + return op; +} + +static int +__bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term) +{ + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (def.type != BPF_MAP_TYPE_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + if (def.key_size < sizeof(unsigned int)) { + pr_debug("Map %s has incorrect key size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; + } + switch (def.value_size) { + case 1: + case 2: + case 4: + case 8: + break; + default: + pr_debug("Map %s has incorrect value size\n", map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + + op = bpf_map__add_newop(map, term); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_VALUE; + op->v.value = term->val.num; + return 0; +} + +static int +bpf_map__config_value(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist __maybe_unused) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) { + pr_debug("ERROR: wrong value type for 'value'\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_value(map, term); +} + +static int +__bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + struct bpf_map_def def; + struct bpf_map_op *op; + const char *map_name; + int err; + + map_name = bpf_map__get_name(map); + evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str); + if (!evsel) { + pr_debug("Event (for '%s') '%s' doesn't exist\n", + map_name, term->val.str); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("Unable to get map definition from '%s'\n", + map_name); + return err; + } + + /* + * No need to check key_size and value_size: + * kernel has already checked them. + */ + if (def.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { + pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map_name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + + op = bpf_map__add_newop(map, term); + if (IS_ERR(op)) + return PTR_ERR(op); + op->op_type = BPF_MAP_OP_SET_EVSEL; + op->v.evsel = evsel; + return 0; +} + +static int +bpf_map__config_event(struct bpf_map *map, + struct parse_events_term *term, + struct perf_evlist *evlist) +{ + if (!term->err_val) { + pr_debug("Config value not set\n"); + return -BPF_LOADER_ERRNO__OBJCONF_CONF; + } + + if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) { + pr_debug("ERROR: wrong value type for 'event'\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE; + } + + return __bpf_map__config_event(map, term, evlist); +} + +struct bpf_obj_config__map_func { + const char *config_opt; + int (*config_func)(struct bpf_map *, struct parse_events_term *, + struct perf_evlist *); +}; + +struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = { + {"value", bpf_map__config_value}, + {"event", bpf_map__config_event}, +}; + +static int +config_map_indices_range_check(struct parse_events_term *term, + struct bpf_map *map, + const char *map_name) +{ + struct parse_events_array *array = &term->array; + struct bpf_map_def def; + unsigned int i; + int err; + + if (!array->nr_ranges) + return 0; + if (!array->ranges) { + pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n", + map_name, (int)array->nr_ranges); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: Unable to get map definition from '%s'\n", + map_name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + for (i = 0; i < array->nr_ranges; i++) { + unsigned int start = array->ranges[i].start; + size_t length = array->ranges[i].length; + unsigned int idx = start + length - 1; + + if (idx >= def.max_entries) { + pr_debug("ERROR: index %d too large\n", idx); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; + } + } + return 0; +} + +static int +bpf__obj_config_map(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *key_scan_pos) +{ + /* key is "map:<mapname>.<config opt>" */ + char *map_name = strdup(term->config + sizeof("map:") - 1); + struct bpf_map *map; + int err = -BPF_LOADER_ERRNO__OBJCONF_OPT; + char *map_opt; + size_t i; + + if (!map_name) + return -ENOMEM; + + map_opt = strchr(map_name, '.'); + if (!map_opt) { + pr_debug("ERROR: Invalid map config: %s\n", map_name); + goto out; + } + + *map_opt++ = '\0'; + if (*map_opt == '\0') { + pr_debug("ERROR: Invalid map option: %s\n", term->config); + goto out; + } + + map = bpf_object__get_map_by_name(obj, map_name); + if (!map) { + pr_debug("ERROR: Map %s doesn't exist\n", map_name); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST; + goto out; + } + + *key_scan_pos += strlen(map_opt); + err = config_map_indices_range_check(term, map, map_name); + if (err) + goto out; + *key_scan_pos -= strlen(map_opt); + + for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) { + struct bpf_obj_config__map_func *func = + &bpf_obj_config__map_funcs[i]; + + if (strcmp(map_opt, func->config_opt) == 0) { + err = func->config_func(map, term, evlist); + goto out; + } + } + + pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); + err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; +out: + free(map_name); + if (!err) + key_scan_pos += strlen(map_opt); + return err; +} + +int bpf__config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos) +{ + int key_scan_pos = 0; + int err; + + if (!obj || !term || !term->config) + return -EINVAL; + + if (!prefixcmp(term->config, "map:")) { + key_scan_pos = sizeof("map:") - 1; + err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos); + goto out; + } + err = -BPF_LOADER_ERRNO__OBJCONF_OPT; +out: + if (error_pos) + *error_pos = key_scan_pos; + return err; + +} + +typedef int (*map_config_func_t)(const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op, + void *pkey, void *arg); + +static int +foreach_key_array_all(map_config_func_t func, + void *arg, const char *name, + int map_fd, struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i; + int err; + + for (i = 0; i < pdef->max_entries; i++) { + err = func(name, map_fd, pdef, op, &i, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, i); + return err; + } + } + return 0; +} + +static int +foreach_key_array_ranges(map_config_func_t func, void *arg, + const char *name, int map_fd, + struct bpf_map_def *pdef, + struct bpf_map_op *op) +{ + unsigned int i, j; + int err; + + for (i = 0; i < op->k.array.nr_ranges; i++) { + unsigned int start = op->k.array.ranges[i].start; + size_t length = op->k.array.ranges[i].length; + + for (j = 0; j < length; j++) { + unsigned int idx = start + j; + + err = func(name, map_fd, pdef, op, &idx, arg); + if (err) { + pr_debug("ERROR: failed to insert value to %s[%u]\n", + name, idx); + return err; + } + } + } + return 0; +} + +static int +bpf_map_config_foreach_key(struct bpf_map *map, + map_config_func_t func, + void *arg) +{ + int err, map_fd; + const char *name; + struct bpf_map_op *op; + struct bpf_map_def def; + struct bpf_map_priv *priv; + + name = bpf_map__get_name(map); + + err = bpf_map__get_private(map, (void **)&priv); + if (err) { + pr_debug("ERROR: failed to get private from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + if (!priv || list_empty(&priv->ops_list)) { + pr_debug("INFO: nothing to config for map %s\n", name); + return 0; + } + + err = bpf_map__get_def(map, &def); + if (err) { + pr_debug("ERROR: failed to get definition from map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + map_fd = bpf_map__get_fd(map); + if (map_fd < 0) { + pr_debug("ERROR: failed to get fd from map %s\n", name); + return map_fd; + } + + list_for_each_entry(op, &priv->ops_list, list) { + switch (def.type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + switch (op->key_type) { + case BPF_MAP_KEY_ALL: + err = foreach_key_array_all(func, arg, name, + map_fd, &def, op); + break; + case BPF_MAP_KEY_RANGES: + err = foreach_key_array_ranges(func, arg, name, + map_fd, &def, + op); + break; + default: + pr_debug("ERROR: keytype for map '%s' invalid\n", + name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + if (err) + return err; + break; + default: + pr_debug("ERROR: type of '%s' incorrect\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; + } + } + + return 0; +} + +static int +apply_config_value_for_key(int map_fd, void *pkey, + size_t val_size, u64 val) +{ + int err = 0; + + switch (val_size) { + case 1: { + u8 _val = (u8)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 2: { + u16 _val = (u16)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 4: { + u32 _val = (u32)(val); + err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY); + break; + } + case 8: { + err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY); + break; + } + default: + pr_debug("ERROR: invalid value size\n"); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE; + } + if (err && errno) + err = -errno; + return err; +} + +static int +apply_config_evsel_for_key(const char *name, int map_fd, void *pkey, + struct perf_evsel *evsel) +{ + struct xyarray *xy = evsel->fd; + struct perf_event_attr *attr; + unsigned int key, events; + bool check_pass = false; + int *evt_fd; + int err; + + if (!xy) { + pr_debug("ERROR: evsel not ready for map %s\n", name); + return -BPF_LOADER_ERRNO__INTERNAL; + } + + if (xy->row_size / xy->entry_size != 1) { + pr_debug("ERROR: Dimension of target event is incorrect for map %s\n", + name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM; + } + + attr = &evsel->attr; + if (attr->inherit) { + pr_debug("ERROR: Can't put inherit event into map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH; + } + + if (perf_evsel__is_bpf_output(evsel)) + check_pass = true; + if (attr->type == PERF_TYPE_RAW) + check_pass = true; + if (attr->type == PERF_TYPE_HARDWARE) + check_pass = true; + if (!check_pass) { + pr_debug("ERROR: Event type is wrong for map %s\n", name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE; + } + + events = xy->entries / (xy->row_size / xy->entry_size); + key = *((unsigned int *)pkey); + if (key >= events) { + pr_debug("ERROR: there is no event %d for map %s\n", + key, name); + return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE; + } + evt_fd = xyarray__entry(xy, key, 0); + err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY); + if (err && errno) + err = -errno; + return err; +} + +static int +apply_obj_config_map_for_key(const char *name, int map_fd, + struct bpf_map_def *pdef __maybe_unused, + struct bpf_map_op *op, + void *pkey, void *arg __maybe_unused) +{ + int err; + + switch (op->op_type) { + case BPF_MAP_OP_SET_VALUE: + err = apply_config_value_for_key(map_fd, pkey, + pdef->value_size, + op->v.value); + break; + case BPF_MAP_OP_SET_EVSEL: + err = apply_config_evsel_for_key(name, map_fd, pkey, + op->v.evsel); + break; + default: + pr_debug("ERROR: unknown value type for '%s'\n", name); + err = -BPF_LOADER_ERRNO__INTERNAL; + } + return err; +} + +static int +apply_obj_config_map(struct bpf_map *map) +{ + return bpf_map_config_foreach_key(map, + apply_obj_config_map_for_key, + NULL); +} + +static int +apply_obj_config_object(struct bpf_object *obj) +{ + struct bpf_map *map; + int err; + + bpf_map__for_each(map, obj) { + err = apply_obj_config_map(map); + if (err) + return err; + } + return 0; +} + +int bpf__apply_obj_config(void) +{ + struct bpf_object *obj, *tmp; + int err; + + bpf_object__for_each_safe(obj, tmp) { + err = apply_obj_config_object(obj); + if (err) + return err; + } + + return 0; +} + +#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) +#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c) +#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START) + +static const char *bpf_loader_strerror_table[NR_ERRNO] = { + [ERRCODE_OFFSET(CONFIG)] = "Invalid config string", + [ERRCODE_OFFSET(GROUP)] = "Invalid group name", + [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string", + [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error", + [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet", + [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string", + [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue", + [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program", + [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue", + [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option", + [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')", + [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option", + [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type", + [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size", + [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size", + [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting", + [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event", + [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map", + [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large", +}; + +static int +bpf_loader_strerror(int err, char *buf, size_t size) +{ + char sbuf[STRERR_BUFSIZE]; + const char *msg; + + if (!buf || !size) + return -1; + + err = err > 0 ? err : -err; + + if (err >= __LIBBPF_ERRNO__START) + return libbpf_strerror(err, buf, size); + + if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) { + msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)]; + snprintf(buf, size, "%s", msg); + buf[size - 1] = '\0'; + return 0; + } + + if (err >= __BPF_LOADER_ERRNO__END) + snprintf(buf, size, "Unknown bpf loader error %d", err); + else + snprintf(buf, size, "%s", + strerror_r(err, sbuf, sizeof(sbuf))); + + buf[size - 1] = '\0'; + return -1; +} + #define bpf__strerror_head(err, buf, size) \ char sbuf[STRERR_BUFSIZE], *emsg;\ if (!size)\ return 0;\ if (err < 0)\ err = -err;\ - emsg = strerror_r(err, sbuf, sizeof(sbuf));\ + bpf_loader_strerror(err, sbuf, sizeof(sbuf));\ + emsg = sbuf;\ switch (err) {\ default:\ scnprintf(buf, size, "%s", emsg);\ @@ -330,23 +1501,92 @@ int bpf__foreach_tev(struct bpf_object *obj, }\ buf[size - 1] = '\0'; +int bpf__strerror_prepare_load(const char *filename, bool source, + int err, char *buf, size_t size) +{ + size_t n; + int ret; + + n = snprintf(buf, size, "Failed to load %s%s: ", + filename, source ? " from source" : ""); + if (n >= size) { + buf[size - 1] = '\0'; + return 0; + } + buf += n; + size -= n; + + ret = bpf_loader_strerror(err, buf, size); + buf[size - 1] = '\0'; + return ret; +} + int bpf__strerror_probe(struct bpf_object *obj __maybe_unused, int err, char *buf, size_t size) { bpf__strerror_head(err, buf, size); - bpf__strerror_entry(EEXIST, "Probe point exist. Try use 'perf probe -d \"*\"'"); - bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0\n"); - bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file\n"); + case BPF_LOADER_ERRNO__PROGCONF_TERM: { + scnprintf(buf, size, "%s (add -v to see detail)", emsg); + break; + } + bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'"); + bpf__strerror_entry(EACCES, "You need to be root"); + bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0"); + bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file"); bpf__strerror_end(buf, size); return 0; } -int bpf__strerror_load(struct bpf_object *obj __maybe_unused, +int bpf__strerror_load(struct bpf_object *obj, int err, char *buf, size_t size) { bpf__strerror_head(err, buf, size); - bpf__strerror_entry(EINVAL, "%s: Are you root and runing a CONFIG_BPF_SYSCALL kernel?", - emsg) + case LIBBPF_ERRNO__KVER: { + unsigned int obj_kver = bpf_object__get_kversion(obj); + unsigned int real_kver; + + if (fetch_kernel_version(&real_kver, NULL, 0)) { + scnprintf(buf, size, "Unable to fetch kernel version"); + break; + } + + if (obj_kver != real_kver) { + scnprintf(buf, size, + "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")", + KVER_PARAM(obj_kver), + KVER_PARAM(real_kver)); + break; + } + + scnprintf(buf, size, "Failed to load program for unknown reason"); + break; + } + bpf__strerror_end(buf, size); + return 0; +} + +int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, int err, + char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, + "Can't use this config term with this map type"); + bpf__strerror_end(buf, size); + return 0; +} + +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) +{ + bpf__strerror_head(err, buf, size); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, + "Cannot set event to BPF map in multi-thread tracing"); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, + "%s (Hint: use -i to turn off inherit)", emsg); + bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, + "Can only put raw, hardware and BPF output event into a BPF map"); bpf__strerror_end(buf, size); return 0; } diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index ccd8d7fd79d3..be4311944e3d 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -8,10 +8,42 @@ #include <linux/compiler.h> #include <linux/err.h> #include <string.h> +#include <bpf/libbpf.h> #include "probe-event.h" +#include "evlist.h" #include "debug.h" +enum bpf_loader_errno { + __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, + /* Invalid config string */ + BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START, + BPF_LOADER_ERRNO__GROUP, /* Invalid group name */ + BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */ + BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */ + BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */ + BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */ + BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */ + BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */ + BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */ + BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */ + BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */ + BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */ + BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */ + BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */ + BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */ + BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */ + __BPF_LOADER_ERRNO__END, +}; + struct bpf_object; +struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, @@ -19,6 +51,11 @@ typedef int (*bpf_prog_iter_callback_t)(struct probe_trace_event *tev, #ifdef HAVE_LIBBPF_SUPPORT struct bpf_object *bpf__prepare_load(const char *filename, bool source); +int bpf__strerror_prepare_load(const char *filename, bool source, + int err, char *buf, size_t size); + +struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, + const char *name); void bpf__clear(void); @@ -32,6 +69,16 @@ int bpf__strerror_load(struct bpf_object *obj, int err, char *buf, size_t size); int bpf__foreach_tev(struct bpf_object *obj, bpf_prog_iter_callback_t func, void *arg); + +int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term, + struct perf_evlist *evlist, int *error_pos); +int bpf__strerror_config_obj(struct bpf_object *obj, + struct parse_events_term *term, + struct perf_evlist *evlist, + int *error_pos, int err, char *buf, + size_t size); +int bpf__apply_obj_config(void); +int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); #else static inline struct bpf_object * bpf__prepare_load(const char *filename __maybe_unused, @@ -41,6 +88,13 @@ bpf__prepare_load(const char *filename __maybe_unused, return ERR_PTR(-ENOTSUP); } +static inline struct bpf_object * +bpf__prepare_load_buffer(void *obj_buf __maybe_unused, + size_t obj_buf_sz __maybe_unused) +{ + return ERR_PTR(-ENOTSUP); +} + static inline void bpf__clear(void) { } static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;} @@ -56,6 +110,21 @@ bpf__foreach_tev(struct bpf_object *obj __maybe_unused, } static inline int +bpf__config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused) +{ + return 0; +} + +static inline int +bpf__apply_obj_config(void) +{ + return 0; +} + +static inline int __bpf_strerror(char *buf, size_t size) { if (!size) @@ -67,6 +136,15 @@ __bpf_strerror(char *buf, size_t size) return 0; } +static inline +int bpf__strerror_prepare_load(const char *filename __maybe_unused, + bool source __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} + static inline int bpf__strerror_probe(struct bpf_object *obj __maybe_unused, int err __maybe_unused, @@ -81,5 +159,23 @@ static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused, { return __bpf_strerror(buf, size); } + +static inline int +bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused, + struct parse_events_term *term __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int *error_pos __maybe_unused, + int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} + +static inline int +bpf__strerror_apply_obj_config(int err __maybe_unused, + char *buf, size_t size) +{ + return __bpf_strerror(buf, size); +} #endif #endif diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c new file mode 100644 index 000000000000..6cdbee119ceb --- /dev/null +++ b/tools/perf/util/bpf-prologue.c @@ -0,0 +1,455 @@ +/* + * bpf-prologue.c + * + * Copyright (C) 2015 He Kuang <hekuang@huawei.com> + * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com> + * Copyright (C) 2015 Huawei Inc. + */ + +#include <bpf/libbpf.h> +#include "perf.h" +#include "debug.h" +#include "bpf-loader.h" +#include "bpf-prologue.h" +#include "probe-finder.h" +#include <dwarf-regs.h> +#include <linux/filter.h> + +#define BPF_REG_SIZE 8 + +#define JMP_TO_ERROR_CODE -1 +#define JMP_TO_SUCCESS_CODE -2 +#define JMP_TO_USER_CODE -3 + +struct bpf_insn_pos { + struct bpf_insn *begin; + struct bpf_insn *end; + struct bpf_insn *pos; +}; + +static inline int +pos_get_cnt(struct bpf_insn_pos *pos) +{ + return pos->pos - pos->begin; +} + +static int +append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos) +{ + if (!pos->pos) + return -BPF_LOADER_ERRNO__PROLOGUE2BIG; + + if (pos->pos + 1 >= pos->end) { + pr_err("bpf prologue: prologue too long\n"); + pos->pos = NULL; + return -BPF_LOADER_ERRNO__PROLOGUE2BIG; + } + + *(pos->pos)++ = new_insn; + return 0; +} + +static int +check_pos(struct bpf_insn_pos *pos) +{ + if (!pos->pos || pos->pos >= pos->end) + return -BPF_LOADER_ERRNO__PROLOGUE2BIG; + return 0; +} + +/* Give it a shorter name */ +#define ins(i, p) append_insn((i), (p)) + +/* + * Give a register name (in 'reg'), generate instruction to + * load register into an eBPF register rd: + * 'ldd target_reg, offset(ctx_reg)', where: + * ctx_reg is pre initialized to pointer of 'struct pt_regs'. + */ +static int +gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg, + const char *reg, int target_reg) +{ + int offset = regs_query_register_offset(reg); + + if (offset < 0) { + pr_err("bpf: prologue: failed to get register %s\n", + reg); + return offset; + } + ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos); + + return check_pos(pos); +} + +/* + * Generate a BPF_FUNC_probe_read function call. + * + * src_base_addr_reg is a register holding base address, + * dst_addr_reg is a register holding dest address (on stack), + * result is: + * + * *[dst_addr_reg] = *([src_base_addr_reg] + offset) + * + * Arguments of BPF_FUNC_probe_read: + * ARG1: ptr to stack (dest) + * ARG2: size (8) + * ARG3: unsafe ptr (src) + */ +static int +gen_read_mem(struct bpf_insn_pos *pos, + int src_base_addr_reg, + int dst_addr_reg, + long offset) +{ + /* mov arg3, src_base_addr_reg */ + if (src_base_addr_reg != BPF_REG_ARG3) + ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos); + /* add arg3, #offset */ + if (offset) + ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos); + + /* mov arg2, #reg_size */ + ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos); + + /* mov arg1, dst_addr_reg */ + if (dst_addr_reg != BPF_REG_ARG1) + ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos); + + /* Call probe_read */ + ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos); + /* + * Error processing: if read fail, goto error code, + * will be relocated. Target should be the start of + * error processing code. + */ + ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE), + pos); + + return check_pos(pos); +} + +/* + * Each arg should be bare register. Fetch and save them into argument + * registers (r3 - r5). + * + * BPF_REG_1 should have been initialized with pointer to + * 'struct pt_regs'. + */ +static int +gen_prologue_fastpath(struct bpf_insn_pos *pos, + struct probe_trace_arg *args, int nargs) +{ + int i, err = 0; + + for (i = 0; i < nargs; i++) { + err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value, + BPF_PROLOGUE_START_ARG_REG + i); + if (err) + goto errout; + } + + return check_pos(pos); +errout: + return err; +} + +/* + * Slow path: + * At least one argument has the form of 'offset($rx)'. + * + * Following code first stores them into stack, then loads all of then + * to r2 - r5. + * Before final loading, the final result should be: + * + * low address + * BPF_REG_FP - 24 ARG3 + * BPF_REG_FP - 16 ARG2 + * BPF_REG_FP - 8 ARG1 + * BPF_REG_FP + * high address + * + * For each argument (described as: offn(...off2(off1(reg)))), + * generates following code: + * + * r7 <- fp + * r7 <- r7 - stack_offset // Ideal code should initialize r7 using + * // fp before generating args. However, + * // eBPF won't regard r7 as stack pointer + * // if it is generated by minus 8 from + * // another stack pointer except fp. + * // This is why we have to set r7 + * // to fp for each variable. + * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx() + * (r7) <- r3 // skip following instructions for bare reg + * r3 <- r3 + off1 . // skip if off1 == 0 + * r2 <- 8 \ + * r1 <- r7 |-> generated by gen_read_mem() + * call probe_read / + * jnei r0, 0, err ./ + * r3 <- (r7) + * r3 <- r3 + off2 . // skip if off2 == 0 + * r2 <- 8 \ // r2 may be broken by probe_read, so set again + * r1 <- r7 |-> generated by gen_read_mem() + * call probe_read / + * jnei r0, 0, err ./ + * ... + */ +static int +gen_prologue_slowpath(struct bpf_insn_pos *pos, + struct probe_trace_arg *args, int nargs) +{ + int err, i; + + for (i = 0; i < nargs; i++) { + struct probe_trace_arg *arg = &args[i]; + const char *reg = arg->value; + struct probe_trace_arg_ref *ref = NULL; + int stack_offset = (i + 1) * -8; + + pr_debug("prologue: fetch arg %d, base reg is %s\n", + i, reg); + + /* value of base register is stored into ARG3 */ + err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg, + BPF_REG_ARG3); + if (err) { + pr_err("prologue: failed to get offset of register %s\n", + reg); + goto errout; + } + + /* Make r7 the stack pointer. */ + ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos); + /* r7 += -8 */ + ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos); + /* + * Store r3 (base register) onto stack + * Ensure fp[offset] is set. + * fp is the only valid base register when storing + * into stack. We are not allowed to use r7 as base + * register here. + */ + ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3, + stack_offset), pos); + + ref = arg->ref; + while (ref) { + pr_debug("prologue: arg %d: offset %ld\n", + i, ref->offset); + err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7, + ref->offset); + if (err) { + pr_err("prologue: failed to generate probe_read function call\n"); + goto errout; + } + + ref = ref->next; + /* + * Load previous result into ARG3. Use + * BPF_REG_FP instead of r7 because verifier + * allows FP based addressing only. + */ + if (ref) + ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3, + BPF_REG_FP, stack_offset), pos); + } + } + + /* Final pass: read to registers */ + for (i = 0; i < nargs; i++) + ins(BPF_LDX_MEM(BPF_DW, BPF_PROLOGUE_START_ARG_REG + i, + BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos); + + ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos); + + return check_pos(pos); +errout: + return err; +} + +static int +prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code, + struct bpf_insn *success_code, struct bpf_insn *user_code) +{ + struct bpf_insn *insn; + + if (check_pos(pos)) + return -BPF_LOADER_ERRNO__PROLOGUE2BIG; + + for (insn = pos->begin; insn < pos->pos; insn++) { + struct bpf_insn *target; + u8 class = BPF_CLASS(insn->code); + u8 opcode; + + if (class != BPF_JMP) + continue; + opcode = BPF_OP(insn->code); + if (opcode == BPF_CALL) + continue; + + switch (insn->off) { + case JMP_TO_ERROR_CODE: + target = error_code; + break; + case JMP_TO_SUCCESS_CODE: + target = success_code; + break; + case JMP_TO_USER_CODE: + target = user_code; + break; + default: + pr_err("bpf prologue: internal error: relocation failed\n"); + return -BPF_LOADER_ERRNO__PROLOGUE; + } + + insn->off = target - (insn + 1); + } + return 0; +} + +int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, + struct bpf_insn *new_prog, size_t *new_cnt, + size_t cnt_space) +{ + struct bpf_insn *success_code = NULL; + struct bpf_insn *error_code = NULL; + struct bpf_insn *user_code = NULL; + struct bpf_insn_pos pos; + bool fastpath = true; + int err = 0, i; + + if (!new_prog || !new_cnt) + return -EINVAL; + + if (cnt_space > BPF_MAXINSNS) + cnt_space = BPF_MAXINSNS; + + pos.begin = new_prog; + pos.end = new_prog + cnt_space; + pos.pos = new_prog; + + if (!nargs) { + ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), + &pos); + + if (check_pos(&pos)) + goto errout; + + *new_cnt = pos_get_cnt(&pos); + return 0; + } + + if (nargs > BPF_PROLOGUE_MAX_ARGS) { + pr_warning("bpf: prologue: %d arguments are dropped\n", + nargs - BPF_PROLOGUE_MAX_ARGS); + nargs = BPF_PROLOGUE_MAX_ARGS; + } + + /* First pass: validation */ + for (i = 0; i < nargs; i++) { + struct probe_trace_arg_ref *ref = args[i].ref; + + if (args[i].value[0] == '@') { + /* TODO: fetch global variable */ + pr_err("bpf: prologue: global %s%+ld not support\n", + args[i].value, ref ? ref->offset : 0); + return -ENOTSUP; + } + + while (ref) { + /* fastpath is true if all args has ref == NULL */ + fastpath = false; + + /* + * Instruction encodes immediate value using + * s32, ref->offset is long. On systems which + * can't fill long in s32, refuse to process if + * ref->offset too large (or small). + */ +#ifdef __LP64__ +#define OFFSET_MAX ((1LL << 31) - 1) +#define OFFSET_MIN ((1LL << 31) * -1) + if (ref->offset > OFFSET_MAX || + ref->offset < OFFSET_MIN) { + pr_err("bpf: prologue: offset out of bound: %ld\n", + ref->offset); + return -BPF_LOADER_ERRNO__PROLOGUEOOB; + } +#endif + ref = ref->next; + } + } + pr_debug("prologue: pass validation\n"); + + if (fastpath) { + /* If all variables are registers... */ + pr_debug("prologue: fast path\n"); + err = gen_prologue_fastpath(&pos, args, nargs); + if (err) + goto errout; + } else { + pr_debug("prologue: slow path\n"); + + /* Initialization: move ctx to a callee saved register. */ + ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos); + + err = gen_prologue_slowpath(&pos, args, nargs); + if (err) + goto errout; + /* + * start of ERROR_CODE (only slow pass needs error code) + * mov r2 <- 1 // r2 is error number + * mov r3 <- 0 // r3, r4... should be touched or + * // verifier would complain + * mov r4 <- 0 + * ... + * goto usercode + */ + error_code = pos.pos; + ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1), + &pos); + + for (i = 0; i < nargs; i++) + ins(BPF_ALU64_IMM(BPF_MOV, + BPF_PROLOGUE_START_ARG_REG + i, + 0), + &pos); + ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE), + &pos); + } + + /* + * start of SUCCESS_CODE: + * mov r2 <- 0 + * goto usercode // skip + */ + success_code = pos.pos; + ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos); + + /* + * start of USER_CODE: + * Restore ctx to r1 + */ + user_code = pos.pos; + if (!fastpath) { + /* + * Only slow path needs restoring of ctx. In fast path, + * register are loaded directly from r1. + */ + ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos); + err = prologue_relocate(&pos, error_code, success_code, + user_code); + if (err) + goto errout; + } + + err = check_pos(&pos); + if (err) + goto errout; + + *new_cnt = pos_get_cnt(&pos); + return 0; +errout: + return err; +} diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h new file mode 100644 index 000000000000..d94cbea12899 --- /dev/null +++ b/tools/perf/util/bpf-prologue.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015, He Kuang <hekuang@huawei.com> + * Copyright (C) 2015, Huawei Inc. + */ +#ifndef __BPF_PROLOGUE_H +#define __BPF_PROLOGUE_H + +#include <linux/compiler.h> +#include <linux/filter.h> +#include "probe-event.h" + +#define BPF_PROLOGUE_MAX_ARGS 3 +#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 +#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 + +#ifdef HAVE_BPF_PROLOGUE +int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, + struct bpf_insn *new_prog, size_t *new_cnt, + size_t cnt_space); +#else +static inline int +bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, + int nargs __maybe_unused, + struct bpf_insn *new_prog __maybe_unused, + size_t *new_cnt, + size_t cnt_space __maybe_unused) +{ + if (!new_cnt) + return -EINVAL; + *new_cnt = 0; + return -ENOTSUP; +} +#endif +#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index d909459fb54c..f1479eeef7da 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -76,6 +76,7 @@ struct perf_tool build_id__mark_dso_hit_ops = { .exit = perf_event__exit_del_thread, .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, + .ordered_events = true, }; int build_id__sprintf(const u8 *build_id, int len, char *bf) @@ -90,7 +91,7 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) bid += 2; } - return raw - build_id; + return (bid - bf) + 1; } int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) @@ -165,6 +166,50 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return build_id__filename(build_id_hex, bf, size); } +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size) +{ + char *id_name, *ch; + struct stat sb; + + id_name = dso__build_id_filename(dso, bf, size); + if (!id_name) + goto err; + if (access(id_name, F_OK)) + goto err; + if (lstat(id_name, &sb) == -1) + goto err; + if ((size_t)sb.st_size > size - 1) + goto err; + if (readlink(id_name, bf, size - 1) < 0) + goto err; + + bf[sb.st_size] = '\0'; + + /* + * link should be: + * ../../lib/modules/4.4.0-rc4/kernel/net/ipv4/netfilter/nf_nat_ipv4.ko/a09fe3eb3147dafa4e3b31dbd6257e4d696bdc92 + */ + ch = strrchr(bf, '/'); + if (!ch) + goto err; + if (ch - 3 < bf) + goto err; + + return strncmp(".ko", ch - 3, 3) == 0; +err: + /* + * If dso__build_id_filename work, get id_name again, + * because id_name points to bf and is broken. + */ + if (id_name) + id_name = dso__build_id_filename(dso, bf, size); + pr_err("Invalid build id: %s\n", id_name ? : + dso->long_name ? : + dso->short_name ? : + "[unknown]"); + return false; +} + #define dsos__for_each_with_build_id(pos, head) \ list_for_each_entry(pos, head, node) \ if (!pos->has_build_id) \ @@ -210,6 +255,7 @@ static int machine__write_buildid_table(struct machine *machine, int fd) dsos__for_each_with_build_id(pos, &machine->dsos.head) { const char *name; size_t name_len; + bool in_kernel = false; if (!pos->hit) continue; @@ -226,8 +272,11 @@ static int machine__write_buildid_table(struct machine *machine, int fd) name_len = pos->long_name_len + 1; } + in_kernel = pos->kernel || + is_kernel_module(name, + PERF_RECORD_MISC_CPUMODE_UNKNOWN); err = write_buildid(name, name_len, pos->build_id, machine->pid, - pos->kernel ? kmisc : umisc, fd); + in_kernel ? kmisc : umisc, fd); if (err) break; } diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 27a14a8a945b..64af3e20610d 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -16,6 +16,7 @@ int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); int filename__sprintf_build_id(const char *pathname, char *sbuild_id); char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); +bool dso__build_id_is_kmod(const struct dso *dso, char *bf, size_t size); int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index c861373aaed3..3ca453f0c51f 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -4,9 +4,12 @@ #include <stdbool.h> #include "util.h" #include "strbuf.h" +#include <subcmd/pager.h> #include "../perf.h" #include "../ui/ui.h" +#include <linux/string.h> + #define CMD_EXEC_PATH "--exec-path" #define CMD_PERF_DIR "--perf-dir=" #define CMD_WORK_TREE "--work-tree=" @@ -18,6 +21,9 @@ #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#define PERF_PAGER_ENVIRONMENT "PERF_PAGER" + +extern const char *config_exclusive_filename; typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); @@ -27,11 +33,7 @@ extern u64 perf_config_u64(const char *, const char *); extern int perf_config_bool(const char *, const char *); extern int config_error_nonbool(const char *); extern const char *perf_config_dirname(const char *, const char *); - -/* pager.c */ -extern void setup_pager(void); -extern int pager_in_use(void); -extern int pager_use_color; +extern const char *perf_etc_perfconfig(void); char *alias_lookup(const char *alias); int split_cmdline(char *cmdline, const char ***argv); @@ -71,9 +73,4 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -#ifndef __UCLIBC__ -/* Matches the libc/libbsd function attribute so we declare this unconditionally: */ -extern size_t strlcpy(char *dest, const char *src, size_t size); -#endif - #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 735ad48e1858..24b4bd0d7754 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value) callchain_param.mode = CHAIN_GRAPH_REL; return 0; } + if (!strncmp(value, "folded", strlen(value))) { + callchain_param.mode = CHAIN_FOLDED; + return 0; + } return -1; } @@ -79,6 +83,23 @@ static int parse_callchain_sort_key(const char *value) return -1; } +static int parse_callchain_value(const char *value) +{ + if (!strncmp(value, "percent", strlen(value))) { + callchain_param.value = CCVAL_PERCENT; + return 0; + } + if (!strncmp(value, "period", strlen(value))) { + callchain_param.value = CCVAL_PERIOD; + return 0; + } + if (!strncmp(value, "count", strlen(value))) { + callchain_param.value = CCVAL_COUNT; + return 0; + } + return -1; +} + static int __parse_callchain_report_opt(const char *arg, bool allow_record_opt) { @@ -102,7 +123,8 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt) if (!parse_callchain_mode(tok) || !parse_callchain_order(tok) || - !parse_callchain_sort_key(tok)) { + !parse_callchain_sort_key(tok) || + !parse_callchain_value(tok)) { /* parsing ok - move on to the next */ try_stack_size = false; goto next; @@ -218,6 +240,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, switch (mode) { case CHAIN_FLAT: + case CHAIN_FOLDED: if (rnode->hit < chain->hit) p = &(*p)->rb_left; else @@ -267,6 +290,7 @@ static void sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, u64 min_hit, struct callchain_param *param __maybe_unused) { + *rb_root = RB_ROOT; __sort_chain_flat(rb_root, &root->node, min_hit); } @@ -338,6 +362,7 @@ int callchain_register_param(struct callchain_param *param) param->sort = sort_chain_graph_rel; break; case CHAIN_FLAT: + case CHAIN_FOLDED: param->sort = sort_chain_flat; break; case CHAIN_NONE: @@ -363,6 +388,7 @@ create_child(struct callchain_node *parent, bool inherit_children) } new->parent = parent; INIT_LIST_HEAD(&new->val); + INIT_LIST_HEAD(&new->parent_val); if (inherit_children) { struct rb_node *n; @@ -390,7 +416,7 @@ create_child(struct callchain_node *parent, bool inherit_children) /* * Fill the node with callchain values */ -static void +static int fill_node(struct callchain_node *node, struct callchain_cursor *cursor) { struct callchain_cursor_node *cursor_node; @@ -407,7 +433,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call = zalloc(sizeof(*call)); if (!call) { perror("not enough memory for the code path tree"); - return; + return -1; } call->ip = cursor_node->ip; call->ms.sym = cursor_node->sym; @@ -417,6 +443,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) callchain_cursor_advance(cursor); cursor_node = callchain_cursor_current(cursor); } + return 0; } static struct callchain_node * @@ -427,23 +454,53 @@ add_child(struct callchain_node *parent, struct callchain_node *new; new = create_child(parent, false); - fill_node(new, cursor); + if (new == NULL) + return NULL; + + if (fill_node(new, cursor) < 0) { + struct callchain_list *call, *tmp; + + list_for_each_entry_safe(call, tmp, &new->val, list) { + list_del(&call->list); + free(call); + } + free(new); + return NULL; + } new->children_hit = 0; new->hit = period; + new->children_count = 0; + new->count = 1; return new; } -static s64 match_chain(struct callchain_cursor_node *node, - struct callchain_list *cnode) +enum match_result { + MATCH_ERROR = -1, + MATCH_EQ, + MATCH_LT, + MATCH_GT, +}; + +static enum match_result match_chain(struct callchain_cursor_node *node, + struct callchain_list *cnode) { struct symbol *sym = node->sym; + u64 left, right; if (cnode->ms.sym && sym && - callchain_param.key == CCKEY_FUNCTION) - return cnode->ms.sym->start - sym->start; - else - return cnode->ip - node->ip; + callchain_param.key == CCKEY_FUNCTION) { + left = cnode->ms.sym->start; + right = sym->start; + } else { + left = cnode->ip; + right = node->ip; + } + + if (left == right) + return MATCH_EQ; + + return left > right ? MATCH_GT : MATCH_LT; } /* @@ -451,7 +508,7 @@ static s64 match_chain(struct callchain_cursor_node *node, * give a part of its callchain to the created child. * Then create another child to host the given callchain of new branch */ -static void +static int split_add_child(struct callchain_node *parent, struct callchain_cursor *cursor, struct callchain_list *to_split, @@ -463,6 +520,8 @@ split_add_child(struct callchain_node *parent, /* split */ new = create_child(parent, true); + if (new == NULL) + return -1; /* split the callchain and move a part to the new child */ old_tail = parent->val.prev; @@ -478,6 +537,9 @@ split_add_child(struct callchain_node *parent, parent->children_hit = callchain_cumul_hits(new); new->val_nr = parent->val_nr - idx_local; parent->val_nr = idx_local; + new->count = parent->count; + new->children_count = parent->children_count; + parent->children_count = callchain_cumul_counts(new); /* create a new child for the new branch if any */ if (idx_total < cursor->nr) { @@ -488,9 +550,13 @@ split_add_child(struct callchain_node *parent, parent->hit = 0; parent->children_hit += period; + parent->count = 0; + parent->children_count += 1; node = callchain_cursor_current(cursor); new = add_child(parent, cursor, period); + if (new == NULL) + return -1; /* * This is second child since we moved parent's children @@ -501,7 +567,7 @@ split_add_child(struct callchain_node *parent, cnode = list_first_entry(&first->val, struct callchain_list, list); - if (match_chain(node, cnode) < 0) + if (match_chain(node, cnode) == MATCH_LT) pp = &p->rb_left; else pp = &p->rb_right; @@ -510,15 +576,17 @@ split_add_child(struct callchain_node *parent, rb_insert_color(&new->rb_node_in, &parent->rb_root_in); } else { parent->hit = period; + parent->count = 1; } + return 0; } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period); -static void +static int append_chain_children(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -530,35 +598,42 @@ append_chain_children(struct callchain_node *root, node = callchain_cursor_current(cursor); if (!node) - return; + return -1; /* lookup in childrens */ while (*p) { - s64 ret; + enum match_result ret; parent = *p; rnode = rb_entry(parent, struct callchain_node, rb_node_in); /* If at least first entry matches, rely to children */ ret = append_chain(rnode, cursor, period); - if (ret == 0) + if (ret == MATCH_EQ) goto inc_children_hit; + if (ret == MATCH_ERROR) + return -1; - if (ret < 0) + if (ret == MATCH_LT) p = &parent->rb_left; else p = &parent->rb_right; } /* nothing in children, add to the current node */ rnode = add_child(root, cursor, period); + if (rnode == NULL) + return -1; + rb_link_node(&rnode->rb_node_in, parent, p); rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); inc_children_hit: root->children_hit += period; + root->children_count++; + return 0; } -static int +static enum match_result append_chain(struct callchain_node *root, struct callchain_cursor *cursor, u64 period) @@ -567,7 +642,7 @@ append_chain(struct callchain_node *root, u64 start = cursor->pos; bool found = false; u64 matches; - int cmp = 0; + enum match_result cmp = MATCH_ERROR; /* * Lookup in the current node @@ -583,7 +658,7 @@ append_chain(struct callchain_node *root, break; cmp = match_chain(node, cnode); - if (cmp) + if (cmp != MATCH_EQ) break; found = true; @@ -593,7 +668,7 @@ append_chain(struct callchain_node *root, /* matches not, relay no the parent */ if (!found) { - WARN_ONCE(!cmp, "Chain comparison error\n"); + WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n"); return cmp; } @@ -601,20 +676,25 @@ append_chain(struct callchain_node *root, /* we match only a part of the node. Split it and add the new chain */ if (matches < root->val_nr) { - split_add_child(root, cursor, cnode, start, matches, period); - return 0; + if (split_add_child(root, cursor, cnode, start, matches, + period) < 0) + return MATCH_ERROR; + + return MATCH_EQ; } /* we match 100% of the path, increment the hit */ if (matches == root->val_nr && cursor->pos == cursor->nr) { root->hit += period; - return 0; + root->count++; + return MATCH_EQ; } /* We match the node and still have a part remaining */ - append_chain_children(root, cursor, period); + if (append_chain_children(root, cursor, period) < 0) + return MATCH_ERROR; - return 0; + return MATCH_EQ; } int callchain_append(struct callchain_root *root, @@ -626,7 +706,8 @@ int callchain_append(struct callchain_root *root, callchain_cursor_commit(cursor); - append_chain_children(&root->node, cursor, period); + if (append_chain_children(&root->node, cursor, period) < 0) + return -1; if (cursor->nr > root->max_depth) root->max_depth = cursor->nr; @@ -654,7 +735,8 @@ merge_chain_branch(struct callchain_cursor *cursor, if (src->hit) { callchain_cursor_commit(cursor); - append_chain_children(dst, cursor, src->hit); + if (append_chain_children(dst, cursor, src->hit) < 0) + return -1; } n = rb_first(&src->rb_root_in); @@ -799,12 +881,72 @@ char *callchain_list__sym_name(struct callchain_list *cl, return bf; } +char *callchain_node__scnprintf_value(struct callchain_node *node, + char *bf, size_t bfsize, u64 total) +{ + double percent = 0.0; + u64 period = callchain_cumul_hits(node); + unsigned count = callchain_cumul_counts(node); + + if (callchain_param.mode == CHAIN_FOLDED) { + period = node->hit; + count = node->count; + } + + switch (callchain_param.value) { + case CCVAL_PERIOD: + scnprintf(bf, bfsize, "%"PRIu64, period); + break; + case CCVAL_COUNT: + scnprintf(bf, bfsize, "%u", count); + break; + case CCVAL_PERCENT: + default: + if (total) + percent = period * 100.0 / total; + scnprintf(bf, bfsize, "%.2f%%", percent); + break; + } + return bf; +} + +int callchain_node__fprintf_value(struct callchain_node *node, + FILE *fp, u64 total) +{ + double percent = 0.0; + u64 period = callchain_cumul_hits(node); + unsigned count = callchain_cumul_counts(node); + + if (callchain_param.mode == CHAIN_FOLDED) { + period = node->hit; + count = node->count; + } + + switch (callchain_param.value) { + case CCVAL_PERIOD: + return fprintf(fp, "%"PRIu64, period); + case CCVAL_COUNT: + return fprintf(fp, "%u", count); + case CCVAL_PERCENT: + default: + if (total) + percent = period * 100.0 / total; + return percent_color_fprintf(fp, "%.2f%%", percent); + } + return 0; +} + static void free_callchain_node(struct callchain_node *node) { struct callchain_list *list, *tmp; struct callchain_node *child; struct rb_node *n; + list_for_each_entry_safe(list, tmp, &node->parent_val, list) { + list_del(&list->list); + free(list); + } + list_for_each_entry_safe(list, tmp, &node->val, list) { list_del(&list->list); free(list); @@ -828,3 +970,69 @@ void free_callchain(struct callchain_root *root) free_callchain_node(&root->node); } + +static u64 decay_callchain_node(struct callchain_node *node) +{ + struct callchain_node *child; + struct rb_node *n; + u64 child_hits = 0; + + n = rb_first(&node->rb_root_in); + while (n) { + child = container_of(n, struct callchain_node, rb_node_in); + + child_hits += decay_callchain_node(child); + n = rb_next(n); + } + + node->hit = (node->hit * 7) / 8; + node->children_hit = child_hits; + + return node->hit; +} + +void decay_callchain(struct callchain_root *root) +{ + if (!symbol_conf.use_callchain) + return; + + decay_callchain_node(&root->node); +} + +int callchain_node__make_parent_list(struct callchain_node *node) +{ + struct callchain_node *parent = node->parent; + struct callchain_list *chain, *new; + LIST_HEAD(head); + + while (parent) { + list_for_each_entry_reverse(chain, &parent->val, list) { + new = malloc(sizeof(*new)); + if (new == NULL) + goto out; + *new = *chain; + new->has_children = false; + list_add_tail(&new->list, &head); + } + parent = parent->parent; + } + + list_for_each_entry_safe_reverse(chain, new, &head, list) + list_move_tail(&chain->list, &node->parent_val); + + if (!list_empty(&node->parent_val)) { + chain = list_first_entry(&node->parent_val, struct callchain_list, list); + chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node); + + chain = list_first_entry(&node->val, struct callchain_list, list); + chain->has_children = false; + } + return 0; + +out: + list_for_each_entry_safe(chain, new, &head, list) { + list_del(&chain->list); + free(chain); + } + return -ENOMEM; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fce8161e54db..18dd22269764 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -24,12 +24,13 @@ #define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP #define CALLCHAIN_REPORT_HELP \ - HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \ + HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \ HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \ HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \ HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \ HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \ - HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" + HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \ + HELP_PAD "value:\t\tcall graph value (percent|period|count)\n" enum perf_call_graph_mode { CALLCHAIN_NONE, @@ -43,7 +44,8 @@ enum chain_mode { CHAIN_NONE, CHAIN_FLAT, CHAIN_GRAPH_ABS, - CHAIN_GRAPH_REL + CHAIN_GRAPH_REL, + CHAIN_FOLDED, }; enum chain_order { @@ -54,11 +56,14 @@ enum chain_order { struct callchain_node { struct callchain_node *parent; struct list_head val; + struct list_head parent_val; struct rb_node rb_node_in; /* to insert nodes in an rbtree */ struct rb_node rb_node; /* to sort nodes in an output tree */ struct rb_root rb_root_in; /* input tree of children */ struct rb_root rb_root; /* sorted output tree of children */ unsigned int val_nr; + unsigned int count; + unsigned int children_count; u64 hit; u64 children_hit; }; @@ -78,6 +83,12 @@ enum chain_key { CCKEY_ADDRESS }; +enum chain_value { + CCVAL_PERCENT, + CCVAL_PERIOD, + CCVAL_COUNT, +}; + struct callchain_param { bool enabled; enum perf_call_graph_mode record_mode; @@ -90,6 +101,7 @@ struct callchain_param { bool order_set; enum chain_key key; bool branch_callstack; + enum chain_value value; }; extern struct callchain_param callchain_param; @@ -131,6 +143,7 @@ extern __thread struct callchain_cursor callchain_cursor; static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.val); + INIT_LIST_HEAD(&root->node.parent_val); root->node.parent = NULL; root->node.hit = 0; @@ -144,6 +157,11 @@ static inline u64 callchain_cumul_hits(struct callchain_node *node) return node->hit + node->children_hit; } +static inline unsigned callchain_cumul_counts(struct callchain_node *node) +{ + return node->count + node->children_count; +} + int callchain_register_param(struct callchain_param *param); int callchain_append(struct callchain_root *root, struct callchain_cursor *cursor, @@ -229,7 +247,13 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused, char *callchain_list__sym_name(struct callchain_list *cl, char *bf, size_t bfsize, bool show_dso); +char *callchain_node__scnprintf_value(struct callchain_node *node, + char *bf, size_t bfsize, u64 total); +int callchain_node__fprintf_value(struct callchain_node *node, + FILE *fp, u64 total); void free_callchain(struct callchain_root *root); +void decay_callchain(struct callchain_root *root); +int callchain_node__make_parent_list(struct callchain_node *node); #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 32e12ecfe9c5..90aa1b46b2e5 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -1,6 +1,6 @@ #include "util.h" #include "../perf.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "evsel.h" #include "cgroup.h" #include "evlist.h" diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 9b9565416f90..43e84aa27e4a 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -24,7 +24,7 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) auto_color: if (stdout_is_tty < 0) stdout_is_tty = isatty(1); - if (stdout_is_tty || (pager_in_use() && pager_use_color)) { + if (stdout_is_tty || pager_in_use()) { char *term = getenv("TERM"); if (term && strcmp(term, "dumb")) return 1; @@ -32,14 +32,15 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) return 0; } -int perf_color_default_config(const char *var, const char *value, void *cb) +int perf_color_default_config(const char *var, const char *value, + void *cb __maybe_unused) { if (!strcmp(var, "color.ui")) { perf_use_color_default = perf_config_colorbool(var, value, -1); return 0; } - return perf_default_config(var, value, cb); + return 0; } static int __color_vsnprintf(char *bf, size_t size, const char *color, diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 2e452ac1353d..4e727635476e 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -10,7 +10,7 @@ */ #include "util.h" #include "cache.h" -#include "exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "util/hist.h" /* perf_hist_config */ #include "util/llvm-utils.h" /* perf_llvm_config */ @@ -26,7 +26,7 @@ static const char *config_file_name; static int config_linenr; static int config_file_eof; -static const char *config_exclusive_filename; +const char *config_exclusive_filename; static int get_next_char(void) { @@ -434,7 +434,7 @@ static int perf_config_from_file(config_fn_t fn, const char *filename, void *dat return ret; } -static const char *perf_etc_perfconfig(void) +const char *perf_etc_perfconfig(void) { static const char *system_wide; if (!system_wide) diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 10af1e7524fb..9bcf2bed3a6d 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -5,8 +5,13 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include <linux/bitmap.h> #include "asm/bug.h" +static int max_cpu_num; +static int max_node_num; +static int *cpunode_map; + static struct cpu_map *cpu_map__default_new(void) { struct cpu_map *cpus; @@ -179,6 +184,56 @@ out: return cpus; } +static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus) +{ + struct cpu_map *map; + + map = cpu_map__empty_new(cpus->nr); + if (map) { + unsigned i; + + for (i = 0; i < cpus->nr; i++) { + /* + * Special treatment for -1, which is not real cpu number, + * and we need to use (int) -1 to initialize map[i], + * otherwise it would become 65535. + */ + if (cpus->cpu[i] == (u16) -1) + map->map[i] = -1; + else + map->map[i] = (int) cpus->cpu[i]; + } + } + + return map; +} + +static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask) +{ + struct cpu_map *map; + int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE; + + nr = bitmap_weight(mask->mask, nbits); + + map = cpu_map__empty_new(nr); + if (map) { + int cpu, i = 0; + + for_each_set_bit(cpu, mask->mask, nbits) + map->map[i++] = cpu; + } + return map; + +} + +struct cpu_map *cpu_map__new_data(struct cpu_map_data *data) +{ + if (data->type == PERF_CPU_MAP__CPUS) + return cpu_map__from_entries((struct cpu_map_entries *)data->data); + else + return cpu_map__from_mask((struct cpu_map_mask *)data->data); +} + size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp) { int i; @@ -435,6 +490,32 @@ out: pr_err("Failed to read max nodes, using default of %d\n", max_node_num); } +int cpu__max_node(void) +{ + if (unlikely(!max_node_num)) + set_max_node_num(); + + return max_node_num; +} + +int cpu__max_cpu(void) +{ + if (unlikely(!max_cpu_num)) + set_max_cpu_num(); + + return max_cpu_num; +} + +int cpu__get_node(int cpu) +{ + if (unlikely(cpunode_map == NULL)) { + pr_debug("cpu_map not initialized\n"); + return -1; + } + + return cpunode_map[cpu]; +} + static int init_cpunode_map(void) { int i; diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 85f7772457fa..81a2562aaa2b 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -17,6 +17,7 @@ struct cpu_map { struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__empty_new(int nr); struct cpu_map *cpu_map__dummy_new(void); +struct cpu_map *cpu_map__new_data(struct cpu_map_data *data); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket_id(int cpu); @@ -56,37 +57,11 @@ static inline bool cpu_map__empty(const struct cpu_map *map) return map ? map->map[0] == -1 : true; } -int max_cpu_num; -int max_node_num; -int *cpunode_map; - int cpu__setup_cpunode_map(void); -static inline int cpu__max_node(void) -{ - if (unlikely(!max_node_num)) - pr_debug("cpu_map not initialized\n"); - - return max_node_num; -} - -static inline int cpu__max_cpu(void) -{ - if (unlikely(!max_cpu_num)) - pr_debug("cpu_map not initialized\n"); - - return max_cpu_num; -} - -static inline int cpu__get_node(int cpu) -{ - if (unlikely(cpunode_map == NULL)) { - pr_debug("cpu_map not initialized\n"); - return -1; - } - - return cpunode_map[cpu]; -} +int cpu__max_node(void); +int cpu__max_cpu(void); +int cpu__get_node(int cpu); int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, int (*f)(struct cpu_map *map, int cpu, void *data), diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index aada3ac5e891..d4a5a21c2a7e 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c @@ -32,8 +32,17 @@ unsigned char sane_ctype[256] = { const char *graph_line = "_____________________________________________________________________" + "_____________________________________________________________________" "_____________________________________________________________________"; const char *graph_dotted_line = "---------------------------------------------------------------------" "---------------------------------------------------------------------" "---------------------------------------------------------------------"; +const char *spaces = + " " + " " + " "; +const char *dots = + "....................................................................." + "....................................................................." + "....................................................................."; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 5bfc1198ab46..811af89ce0bb 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -63,6 +63,7 @@ struct ctf_writer { struct bt_ctf_field_type *s32; struct bt_ctf_field_type *u32; struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u32_hex; struct bt_ctf_field_type *u64_hex; }; struct bt_ctf_field_type *array[6]; @@ -351,6 +352,84 @@ static int add_tracepoint_values(struct ctf_writer *cw, return ret; } +static int +add_bpf_output_values(struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample) +{ + struct bt_ctf_field_type *len_type, *seq_type; + struct bt_ctf_field *len_field, *seq_field; + unsigned int raw_size = sample->raw_size; + unsigned int nr_elements = raw_size / sizeof(u32); + unsigned int i; + int ret; + + if (nr_elements * sizeof(u32) != raw_size) + pr_warning("Incorrect raw_size (%u) in bpf output event, skip %lu bytes\n", + raw_size, nr_elements * sizeof(u32) - raw_size); + + len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len"); + len_field = bt_ctf_field_create(len_type); + if (!len_field) { + pr_err("failed to create 'raw_len' for bpf output event\n"); + ret = -1; + goto put_len_type; + } + + ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements); + if (ret) { + pr_err("failed to set field value for raw_len\n"); + goto put_len_field; + } + ret = bt_ctf_event_set_payload(event, "raw_len", len_field); + if (ret) { + pr_err("failed to set payload to raw_len\n"); + goto put_len_field; + } + + seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data"); + seq_field = bt_ctf_field_create(seq_type); + if (!seq_field) { + pr_err("failed to create 'raw_data' for bpf output event\n"); + ret = -1; + goto put_seq_type; + } + + ret = bt_ctf_field_sequence_set_length(seq_field, len_field); + if (ret) { + pr_err("failed to set length of 'raw_data'\n"); + goto put_seq_field; + } + + for (i = 0; i < nr_elements; i++) { + struct bt_ctf_field *elem_field = + bt_ctf_field_sequence_get_field(seq_field, i); + + ret = bt_ctf_field_unsigned_integer_set_value(elem_field, + ((u32 *)(sample->raw_data))[i]); + + bt_ctf_field_put(elem_field); + if (ret) { + pr_err("failed to set raw_data[%d]\n", i); + goto put_seq_field; + } + } + + ret = bt_ctf_event_set_payload(event, "raw_data", seq_field); + if (ret) + pr_err("failed to set payload for raw_data\n"); + +put_seq_field: + bt_ctf_field_put(seq_field); +put_seq_type: + bt_ctf_field_type_put(seq_type); +put_len_field: + bt_ctf_field_put(len_field); +put_len_type: + bt_ctf_field_type_put(len_type); + return ret; +} + static int add_generic_values(struct ctf_writer *cw, struct bt_ctf_event *event, struct perf_evsel *evsel, @@ -596,6 +675,12 @@ static int process_sample_event(struct perf_tool *tool, return -1; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_values(event_class, event, sample); + if (ret) + return -1; + } + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); if (cs) { if (is_flush_needed(cs)) @@ -743,6 +828,25 @@ static int add_tracepoint_types(struct ctf_writer *cw, return ret; } +static int add_bpf_output_types(struct ctf_writer *cw, + struct bt_ctf_event_class *class) +{ + struct bt_ctf_field_type *len_type = cw->data.u32; + struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex; + struct bt_ctf_field_type *seq_type; + int ret; + + ret = bt_ctf_event_class_add_field(class, len_type, "raw_len"); + if (ret) + return ret; + + seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len"); + if (!seq_type) + return -1; + + return bt_ctf_event_class_add_field(class, seq_type, "raw_data"); +} + static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *event_class) { @@ -754,7 +858,8 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, * ctf event header * PERF_SAMPLE_READ - TODO * PERF_SAMPLE_CALLCHAIN - TODO - * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_RAW - tracepoint fields and BPF output + * are handled separately * PERF_SAMPLE_BRANCH_STACK - TODO * PERF_SAMPLE_REGS_USER - TODO * PERF_SAMPLE_STACK_USER - TODO @@ -823,6 +928,12 @@ static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) goto err; } + if (perf_evsel__is_bpf_output(evsel)) { + ret = add_bpf_output_types(cw, event_class); + if (ret) + goto err; + } + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); if (ret) { pr("Failed to add event class into stream.\n"); @@ -857,6 +968,23 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session) return 0; } +static void cleanup_events(struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + struct evsel_priv *priv; + + priv = evsel->priv; + bt_ctf_event_class_put(priv->event_class); + zfree(&evsel->priv); + } + + perf_evlist__delete(evlist); + session->evlist = NULL; +} + static int setup_streams(struct ctf_writer *cw, struct perf_session *session) { struct ctf_stream **stream; @@ -952,6 +1080,12 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) goto err; +#if __BYTE_ORDER == __BIG_ENDIAN + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN); +#else + bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN); +#endif + pr2("Created type: INTEGER %d-bit %ssigned %s\n", size, sign ? "un" : "", hex ? "hex" : ""); return type; @@ -982,6 +1116,7 @@ do { \ CREATE_INT_TYPE(cw->data.u64, 64, false, false); CREATE_INT_TYPE(cw->data.s32, 32, true, false); CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true); CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); cw->data.string = bt_ctf_field_type_string_create(); @@ -1098,7 +1233,7 @@ static int convert__config(const char *var, const char *value, void *cb) return 0; } - return perf_default_config(var, value, cb); + return 0; } int bt_convert__perf2ctf(const char *input, const char *path, bool force) @@ -1169,6 +1304,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force) (double) c.events_size / 1024.0 / 1024.0, c.events_count); + cleanup_events(session); perf_session__delete(session); ctf_writer__cleanup(cw); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 86d9c7302598..8c4212abd19b 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -5,6 +5,7 @@ #include <string.h> #include <stdarg.h> #include <stdio.h> +#include <api/debug.h> #include "cache.h" #include "color.h" @@ -22,7 +23,7 @@ int debug_ordered_events; static int redirect_to_stderr; int debug_data_convert; -static int _eprintf(int level, int var, const char *fmt, va_list args) +int veprintf(int level, int var, const char *fmt, va_list args) { int ret = 0; @@ -36,24 +37,19 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) return ret; } -int veprintf(int level, int var, const char *fmt, va_list args) -{ - return _eprintf(level, var, fmt, args); -} - int eprintf(int level, int var, const char *fmt, ...) { va_list args; int ret; va_start(args, fmt); - ret = _eprintf(level, var, fmt, args); + ret = veprintf(level, var, fmt, args); va_end(args); return ret; } -static int __eprintf_time(u64 t, const char *fmt, va_list args) +static int veprintf_time(u64 t, const char *fmt, va_list args) { int ret = 0; u64 secs, usecs, nsecs = t; @@ -75,7 +71,7 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) if (var >= level) { va_start(args, fmt); - ret = __eprintf_time(t, fmt, args); + ret = veprintf_time(t, fmt, args); va_end(args); } @@ -91,7 +87,7 @@ void pr_stat(const char *fmt, ...) va_list args; va_start(args, fmt); - _eprintf(1, verbose, fmt, args); + veprintf(1, verbose, fmt, args); va_end(args); eprintf(1, verbose, "\n"); } @@ -110,40 +106,61 @@ int dump_printf(const char *fmt, ...) return ret; } +static void trace_event_printer(enum binary_printer_ops op, + unsigned int val, void *extra) +{ + const char *color = PERF_COLOR_BLUE; + union perf_event *event = (union perf_event *)extra; + unsigned char ch = (unsigned char)val; + + switch (op) { + case BINARY_PRINT_DATA_BEGIN: + printf("."); + color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", + event->header.size); + break; + case BINARY_PRINT_LINE_BEGIN: + printf("."); + break; + case BINARY_PRINT_ADDR: + color_fprintf(stdout, color, " %04x: ", val); + break; + case BINARY_PRINT_NUM_DATA: + color_fprintf(stdout, color, " %02x", val); + break; + case BINARY_PRINT_NUM_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_SEP: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_CHAR_DATA: + color_fprintf(stdout, color, "%c", + isprint(ch) ? ch : '.'); + break; + case BINARY_PRINT_CHAR_PAD: + color_fprintf(stdout, color, " "); + break; + case BINARY_PRINT_LINE_END: + color_fprintf(stdout, color, "\n"); + break; + case BINARY_PRINT_DATA_END: + printf("\n"); + break; + default: + break; + } +} + void trace_event(union perf_event *event) { unsigned char *raw_event = (void *)event; - const char *color = PERF_COLOR_BLUE; - int i, j; if (!dump_trace) return; - printf("."); - color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n", - event->header.size); - - for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) { - printf("."); - color_fprintf(stdout, color, " %04x: ", i); - } - - color_fprintf(stdout, color, " %02x", raw_event[i]); - - if (((i & 15) == 15) || i == event->header.size-1) { - color_fprintf(stdout, color, " "); - for (j = 0; j < 15-(i & 15); j++) - color_fprintf(stdout, color, " "); - for (j = i & ~15; j <= i; j++) { - color_fprintf(stdout, color, "%c", - isprint(raw_event[j]) ? - raw_event[j] : '.'); - } - color_fprintf(stdout, color, "\n"); - } - } - printf(".\n"); + print_binary(raw_event, event->header.size, 16, + trace_event_printer, event); } static struct debug_variable { @@ -192,3 +209,23 @@ int perf_debug_option(const char *str) free(s); return 0; } + +#define DEBUG_WRAPPER(__n, __l) \ +static int pr_ ## __n ## _wrapper(const char *fmt, ...) \ +{ \ + va_list args; \ + int ret; \ + \ + va_start(args, fmt); \ + ret = veprintf(__l, verbose, fmt, args); \ + va_end(args); \ + return ret; \ +} + +DEBUG_WRAPPER(warning, 0); +DEBUG_WRAPPER(debug, 1); + +void perf_debug_setup(void) +{ + libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper); +} diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index 8b9a088c32ab..14bafda79eda 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -53,5 +53,6 @@ int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__( int veprintf(int level, int var, const char *fmt, va_list args); int perf_debug_option(const char *str); +void perf_debug_setup(void); #endif /* __PERF_DEBUG_H */ diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c new file mode 100644 index 000000000000..3e6062ab2cdd --- /dev/null +++ b/tools/perf/util/demangle-java.c @@ -0,0 +1,199 @@ +#include <sys/types.h> +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "debug.h" +#include "symbol.h" + +#include "demangle-java.h" + +enum { + MODE_PREFIX = 0, + MODE_CLASS = 1, + MODE_FUNC = 2, + MODE_TYPE = 3, + MODE_CTYPE = 3, /* class arg */ +}; + +#define BASE_ENT(c, n) [c - 'A']=n +static const char *base_types['Z' - 'A' + 1] = { + BASE_ENT('B', "byte" ), + BASE_ENT('C', "char" ), + BASE_ENT('D', "double" ), + BASE_ENT('F', "float" ), + BASE_ENT('I', "int" ), + BASE_ENT('J', "long" ), + BASE_ENT('S', "short" ), + BASE_ENT('Z', "bool" ), +}; + +/* + * demangle Java symbol between str and end positions and stores + * up to maxlen characters into buf. The parser starts in mode. + * + * Use MODE_PREFIX to process entire prototype till end position + * Use MODE_TYPE to process return type if str starts on return type char + * + * Return: + * success: buf + * error : NULL + */ +static char * +__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode) +{ + int rlen = 0; + int array = 0; + int narg = 0; + const char *q; + + if (!end) + end = str + strlen(str); + + for (q = str; q != end; q++) { + + if (rlen == (maxlen - 1)) + break; + + switch (*q) { + case 'L': + if (mode == MODE_PREFIX || mode == MODE_CTYPE) { + if (mode == MODE_CTYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + narg++; + } + rlen += scnprintf(buf + rlen, maxlen - rlen, "class "); + if (mode == MODE_PREFIX) + mode = MODE_CLASS; + } else + buf[rlen++] = *q; + break; + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + if (mode == MODE_TYPE) { + if (narg) + rlen += scnprintf(buf + rlen, maxlen - rlen, ", "); + rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + narg++; + } else + buf[rlen++] = *q; + break; + case 'V': + if (mode == MODE_TYPE) { + rlen += scnprintf(buf + rlen, maxlen - rlen, "void"); + while (array--) + rlen += scnprintf(buf + rlen, maxlen - rlen, "[]"); + array = 0; + } else + buf[rlen++] = *q; + break; + case '[': + if (mode != MODE_TYPE) + goto error; + array++; + break; + case '(': + if (mode != MODE_FUNC) + goto error; + buf[rlen++] = *q; + mode = MODE_TYPE; + break; + case ')': + if (mode != MODE_TYPE) + goto error; + buf[rlen++] = *q; + narg = 0; + break; + case ';': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + /* safe because at least one other char to process */ + if (isalpha(*(q + 1))) + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + if (mode == MODE_CLASS) + mode = MODE_FUNC; + else if (mode == MODE_CTYPE) + mode = MODE_TYPE; + break; + case '/': + if (mode != MODE_CLASS && mode != MODE_CTYPE) + goto error; + rlen += scnprintf(buf + rlen, maxlen - rlen, "."); + break; + default : + buf[rlen++] = *q; + } + } + buf[rlen] = '\0'; + return buf; +error: + return NULL; +} + +/* + * Demangle Java function signature (openJDK, not GCJ) + * input: + * str: string to parse. String is not modified + * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling + * return: + * if input can be demangled, then a newly allocated string is returned. + * if input cannot be demangled, then NULL is returned + * + * Note: caller is responsible for freeing demangled string + */ +char * +java_demangle_sym(const char *str, int flags) +{ + char *buf, *ptr; + char *p; + size_t len, l1 = 0; + + if (!str) + return NULL; + + /* find start of retunr type */ + p = strrchr(str, ')'); + if (!p) + return NULL; + + /* + * expansion factor estimated to 3x + */ + len = strlen(str) * 3 + 1; + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + if (!(flags & JAVA_DEMANGLE_NORET)) { + /* + * get return type first + */ + ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE); + if (!ptr) + goto error; + + /* add space between return type and function prototype */ + l1 = strlen(buf); + buf[l1++] = ' '; + } + + /* process function up to return type */ + ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX); + if (!ptr) + goto error; + + return buf; +error: + free(buf); + return NULL; +} diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h new file mode 100644 index 000000000000..a981c1f968fe --- /dev/null +++ b/tools/perf/util/demangle-java.h @@ -0,0 +1,10 @@ +#ifndef __PERF_DEMANGLE_JAVA +#define __PERF_DEMANGLE_JAVA 1 +/* + * demangle function flags + */ +#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */ + +char * java_demangle_sym(const char *str, int flags); + +#endif /* __PERF_DEMANGLE_JAVA */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 7c0c08386a1d..8e6395439ca0 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -52,6 +52,11 @@ int dso__read_binary_type_filename(const struct dso *dso, debuglink--; if (*debuglink == '/') debuglink++; + + ret = -1; + if (!is_regular_file(filename)) + break; + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } @@ -933,6 +938,7 @@ static struct dso *__dso__findlink_by_longname(struct rb_root *root, /* Add new node and rebalance tree */ rb_link_node(&dso->rb_node, parent, p); rb_insert_color(&dso->rb_node, root); + dso->root = root; } return NULL; } @@ -945,15 +951,30 @@ static inline struct dso *__dso__find_by_longname(struct rb_root *root, void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) { + struct rb_root *root = dso->root; + if (name == NULL) return; if (dso->long_name_allocated) free((char *)dso->long_name); + if (root) { + rb_erase(&dso->rb_node, root); + /* + * __dso__findlink_by_longname() isn't guaranteed to add it + * back, so a clean removal is required here. + */ + RB_CLEAR_NODE(&dso->rb_node); + dso->root = NULL; + } + dso->long_name = name; dso->long_name_len = strlen(name); dso->long_name_allocated = name_allocated; + + if (root) + __dso__findlink_by_longname(root, dso, NULL); } void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) @@ -1046,6 +1067,7 @@ struct dso *dso__new(const char *name) dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; RB_CLEAR_NODE(&dso->rb_node); + dso->root = NULL; INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); pthread_mutex_init(&dso->lock, NULL); @@ -1226,6 +1248,8 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name) if (dso != NULL) { __dsos__add(dsos, dso); dso__set_basename(dso); + /* Put dso here because __dsos_add already got it */ + dso__put(dso); } return dso; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index fc8db9c764ac..45ec4d0a50ed 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -135,6 +135,7 @@ struct dso { pthread_mutex_t lock; struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ + struct rb_root *root; /* root of rbtree that rb_node is in */ struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; struct { diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6af4f7c36820..49a11d9d8b8f 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -6,6 +6,8 @@ struct perf_env perf_env; void perf_env__exit(struct perf_env *env) { + int i; + zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -19,21 +21,16 @@ void perf_env__exit(struct perf_env *env) zfree(&env->numa_nodes); zfree(&env->pmu_mappings); zfree(&env->cpu); + + for (i = 0; i < env->caches_cnt; i++) + cpu_cache_level__free(&env->caches[i]); + zfree(&env->caches); } int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) { int i; - /* - * If env->cmdline_argv has already been set, do not override it. This allows - * a command to set the cmdline, parse args and then call another - * builtin function that implements a command -- e.g, cmd_kvm calling - * cmd_record. - */ - if (env->cmdline_argv != NULL) - return 0; - /* do not include NULL termination */ env->cmdline_argv = calloc(argc, sizeof(char *)); if (env->cmdline_argv == NULL) @@ -84,3 +81,10 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) env->nr_cpus_avail = nr_cpus; return 0; } + +void cpu_cache_level__free(struct cpu_cache_level *cache) +{ + free(cache->type); + free(cache->map); + free(cache->size); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 0132b9557c02..56cffb60a0b4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -1,11 +1,23 @@ #ifndef __PERF_ENV_H #define __PERF_ENV_H +#include <linux/types.h> + struct cpu_topology_map { int socket_id; int core_id; }; +struct cpu_cache_level { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + char *type; + char *size; + char *map; +}; + struct perf_env { char *hostname; char *os_release; @@ -31,6 +43,8 @@ struct perf_env { char *numa_nodes; char *pmu_mappings; struct cpu_topology_map *cpu; + struct cpu_cache_level *caches; + int caches_cnt; }; extern struct perf_env perf_env; @@ -41,4 +55,5 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); +void cpu_cache_level__free(struct cpu_cache_level *cache); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c deleted file mode 100644 index 7405123692f1..000000000000 --- a/tools/perf/util/environment.c +++ /dev/null @@ -1,8 +0,0 @@ -/* - * We put all the perf config variables in this same object - * file, so that programs can link against the config parser - * without having to link against all the rest of perf. - */ -#include "cache.h" - -int pager_use_color = 1; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 8b10621b415c..7bad5c3fa7b7 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -10,6 +10,8 @@ #include "thread.h" #include "thread_map.h" #include "symbol/kallsyms.h" +#include "asm/bug.h" +#include "stat.h" static const char *perf_event__names[] = { [0] = "TOTAL", @@ -37,6 +39,12 @@ static const char *perf_event__names[] = { [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO", [PERF_RECORD_AUXTRACE] = "AUXTRACE", [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR", + [PERF_RECORD_THREAD_MAP] = "THREAD_MAP", + [PERF_RECORD_CPU_MAP] = "CPU_MAP", + [PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG", + [PERF_RECORD_STAT] = "STAT", + [PERF_RECORD_STAT_ROUND] = "STAT_ROUND", + [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", }; const char *perf_event__name(unsigned int id) @@ -274,7 +282,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, strcpy(execname, ""); /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n", + n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n", &event->mmap2.start, &event->mmap2.len, prot, &event->mmap2.pgoff, &event->mmap2.maj, &event->mmap2.min, @@ -495,7 +503,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, if (comm_event == NULL) goto out; - mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); if (mmap_event == NULL) goto out_free_comm; @@ -569,7 +577,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (comm_event == NULL) goto out; - mmap_event = malloc(sizeof(mmap_event->mmap) + machine->id_hdr_size); + mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size); if (mmap_event == NULL) goto out_free_comm; @@ -699,6 +707,274 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return err; } +int perf_event__synthesize_thread_map2(struct perf_tool *tool, + struct thread_map *threads, + perf_event__handler_t process, + struct machine *machine) +{ + union perf_event *event; + int i, err, size; + + size = sizeof(event->thread_map); + size += threads->nr * sizeof(event->thread_map.entries[0]); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_THREAD_MAP; + event->header.size = size; + event->thread_map.nr = threads->nr; + + for (i = 0; i < threads->nr; i++) { + struct thread_map_event_entry *entry = &event->thread_map.entries[i]; + char *comm = thread_map__comm(threads, i); + + if (!comm) + comm = (char *) ""; + + entry->pid = thread_map__pid(threads, i); + strncpy((char *) &entry->comm, comm, sizeof(entry->comm)); + } + + err = process(tool, event, NULL, machine); + + free(event); + return err; +} + +static void synthesize_cpus(struct cpu_map_entries *cpus, + struct cpu_map *map) +{ + int i; + + cpus->nr = map->nr; + + for (i = 0; i < map->nr; i++) + cpus->cpu[i] = map->map[i]; +} + +static void synthesize_mask(struct cpu_map_mask *mask, + struct cpu_map *map, int max) +{ + int i; + + mask->nr = BITS_TO_LONGS(max); + mask->long_size = sizeof(long); + + for (i = 0; i < map->nr; i++) + set_bit(map->map[i], mask->mask); +} + +static size_t cpus_size(struct cpu_map *map) +{ + return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); +} + +static size_t mask_size(struct cpu_map *map, int *max) +{ + int i; + + *max = 0; + + for (i = 0; i < map->nr; i++) { + /* bit possition of the cpu is + 1 */ + int bit = map->map[i] + 1; + + if (bit > *max) + *max = bit; + } + + return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long); +} + +void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max) +{ + size_t size_cpus, size_mask; + bool is_dummy = cpu_map__empty(map); + + /* + * Both array and mask data have variable size based + * on the number of cpus and their actual values. + * The size of the 'struct cpu_map_data' is: + * + * array = size of 'struct cpu_map_entries' + + * number of cpus * sizeof(u64) + * + * mask = size of 'struct cpu_map_mask' + + * maximum cpu bit converted to size of longs + * + * and finaly + the size of 'struct cpu_map_data'. + */ + size_cpus = cpus_size(map); + size_mask = mask_size(map, max); + + if (is_dummy || (size_cpus < size_mask)) { + *size += size_cpus; + *type = PERF_CPU_MAP__CPUS; + } else { + *size += size_mask; + *type = PERF_CPU_MAP__MASK; + } + + *size += sizeof(struct cpu_map_data); + return zalloc(*size); +} + +void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map, + u16 type, int max) +{ + data->type = type; + + switch (type) { + case PERF_CPU_MAP__CPUS: + synthesize_cpus((struct cpu_map_entries *) data->data, map); + break; + case PERF_CPU_MAP__MASK: + synthesize_mask((struct cpu_map_mask *) data->data, map, max); + default: + break; + }; +} + +static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map) +{ + size_t size = sizeof(struct cpu_map_event); + struct cpu_map_event *event; + int max; + u16 type; + + event = cpu_map_data__alloc(map, &size, &type, &max); + if (!event) + return NULL; + + event->header.type = PERF_RECORD_CPU_MAP; + event->header.size = size; + event->data.type = type; + + cpu_map_data__synthesize(&event->data, map, type, max); + return event; +} + +int perf_event__synthesize_cpu_map(struct perf_tool *tool, + struct cpu_map *map, + perf_event__handler_t process, + struct machine *machine) +{ + struct cpu_map_event *event; + int err; + + event = cpu_map_event__new(map); + if (!event) + return -ENOMEM; + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat_config(struct perf_tool *tool, + struct perf_stat_config *config, + perf_event__handler_t process, + struct machine *machine) +{ + struct stat_config_event *event; + int size, i = 0, err; + + size = sizeof(*event); + size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0])); + + event = zalloc(size); + if (!event) + return -ENOMEM; + + event->header.type = PERF_RECORD_STAT_CONFIG; + event->header.size = size; + event->nr = PERF_STAT_CONFIG_TERM__MAX; + +#define ADD(__term, __val) \ + event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \ + event->data[i].val = __val; \ + i++; + + ADD(AGGR_MODE, config->aggr_mode) + ADD(INTERVAL, config->interval) + ADD(SCALE, config->scale) + + WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX, + "stat config terms unbalanced\n"); +#undef ADD + + err = process(tool, (union perf_event *) event, NULL, machine); + + free(event); + return err; +} + +int perf_event__synthesize_stat(struct perf_tool *tool, + u32 cpu, u32 thread, u64 id, + struct perf_counts_values *count, + perf_event__handler_t process, + struct machine *machine) +{ + struct stat_event event; + + event.header.type = PERF_RECORD_STAT; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.id = id; + event.cpu = cpu; + event.thread = thread; + event.val = count->val; + event.ena = count->ena; + event.run = count->run; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +int perf_event__synthesize_stat_round(struct perf_tool *tool, + u64 evtime, u64 type, + perf_event__handler_t process, + struct machine *machine) +{ + struct stat_round_event event; + + event.header.type = PERF_RECORD_STAT_ROUND; + event.header.size = sizeof(event); + event.header.misc = 0; + + event.time = evtime; + event.type = type; + + return process(tool, (union perf_event *) &event, NULL, machine); +} + +void perf_event__read_stat_config(struct perf_stat_config *config, + struct stat_config_event *event) +{ + unsigned i; + + for (i = 0; i < event->nr; i++) { + + switch (event->data[i].tag) { +#define CASE(__term, __val) \ + case PERF_STAT_CONFIG_TERM__##__term: \ + config->__val = event->data[i].val; \ + break; + + CASE(AGGR_MODE, aggr_mode) + CASE(SCALE, scale) + CASE(INTERVAL, interval) +#undef CASE + default: + pr_warning("unknown stat config term %" PRIu64 "\n", + event->data[i].tag); + } + } +} + size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) { const char *s; @@ -783,6 +1059,38 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp) event->mmap2.filename); } +size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp) +{ + struct thread_map *threads = thread_map__new_event(&event->thread_map); + size_t ret; + + ret = fprintf(fp, " nr: "); + + if (threads) + ret += thread_map__fprintf(threads, fp); + else + ret += fprintf(fp, "failed to get threads from event\n"); + + thread_map__put(threads); + return ret; +} + +size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp) +{ + struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data); + size_t ret; + + ret = fprintf(fp, " nr: "); + + if (cpus) + ret += cpu_map__fprintf(cpus, fp); + else + ret += fprintf(fp, "failed to get cpumap from event\n"); + + cpu_map__put(cpus); + return ret; +} + int perf_event__process_mmap(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index a0dbcbd4f6d8..b7ffb7ee9971 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -226,6 +226,12 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_AUXTRACE_INFO = 70, PERF_RECORD_AUXTRACE = 71, PERF_RECORD_AUXTRACE_ERROR = 72, + PERF_RECORD_THREAD_MAP = 73, + PERF_RECORD_CPU_MAP = 74, + PERF_RECORD_STAT_CONFIG = 75, + PERF_RECORD_STAT = 76, + PERF_RECORD_STAT_ROUND = 77, + PERF_RECORD_EVENT_UPDATE = 78, PERF_RECORD_HEADER_MAX }; @@ -270,12 +276,61 @@ struct events_stats { u32 nr_proc_map_timeout; }; +enum { + PERF_CPU_MAP__CPUS = 0, + PERF_CPU_MAP__MASK = 1, +}; + +struct cpu_map_entries { + u16 nr; + u16 cpu[]; +}; + +struct cpu_map_mask { + u16 nr; + u16 long_size; + unsigned long mask[]; +}; + +struct cpu_map_data { + u16 type; + char data[]; +}; + +struct cpu_map_event { + struct perf_event_header header; + struct cpu_map_data data; +}; + struct attr_event { struct perf_event_header header; struct perf_event_attr attr; u64 id[]; }; +enum { + PERF_EVENT_UPDATE__UNIT = 0, + PERF_EVENT_UPDATE__SCALE = 1, + PERF_EVENT_UPDATE__NAME = 2, + PERF_EVENT_UPDATE__CPUS = 3, +}; + +struct event_update_event_cpus { + struct cpu_map_data cpus; +}; + +struct event_update_event_scale { + double scale; +}; + +struct event_update_event { + struct perf_event_header header; + u64 type; + u64 id; + + char data[]; +}; + #define MAX_EVENT_NAME 64 struct perf_trace_event_type { @@ -356,6 +411,63 @@ struct context_switch_event { u32 next_prev_tid; }; +struct thread_map_event_entry { + u64 pid; + char comm[16]; +}; + +struct thread_map_event { + struct perf_event_header header; + u64 nr; + struct thread_map_event_entry entries[]; +}; + +enum { + PERF_STAT_CONFIG_TERM__AGGR_MODE = 0, + PERF_STAT_CONFIG_TERM__INTERVAL = 1, + PERF_STAT_CONFIG_TERM__SCALE = 2, + PERF_STAT_CONFIG_TERM__MAX = 3, +}; + +struct stat_config_event_entry { + u64 tag; + u64 val; +}; + +struct stat_config_event { + struct perf_event_header header; + u64 nr; + struct stat_config_event_entry data[]; +}; + +struct stat_event { + struct perf_event_header header; + + u64 id; + u32 cpu; + u32 thread; + + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +enum { + PERF_STAT_ROUND_TYPE__INTERVAL = 0, + PERF_STAT_ROUND_TYPE__FINAL = 1, +}; + +struct stat_round_event { + struct perf_event_header header; + u64 type; + u64 time; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -368,6 +480,7 @@ union perf_event { struct throttle_event throttle; struct sample_event sample; struct attr_event attr; + struct event_update_event event_update; struct event_type_event event_type; struct tracing_data_event tracing_data; struct build_id_event build_id; @@ -378,12 +491,20 @@ union perf_event { struct aux_event aux; struct itrace_start_event itrace_start; struct context_switch_event context_switch; + struct thread_map_event thread_map; + struct cpu_map_event cpu_map; + struct stat_config_event stat_config; + struct stat_event stat; + struct stat_round_event stat_round; }; void perf_event__print_totals(void); struct perf_tool; struct thread_map; +struct cpu_map; +struct perf_stat_config; +struct perf_counts_values; typedef int (*perf_event__handler_t)(struct perf_tool *tool, union perf_event *event, @@ -395,6 +516,14 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, unsigned int proc_map_timeout); +int perf_event__synthesize_thread_map2(struct perf_tool *tool, + struct thread_map *threads, + perf_event__handler_t process, + struct machine *machine); +int perf_event__synthesize_cpu_map(struct perf_tool *tool, + struct cpu_map *cpus, + perf_event__handler_t process, + struct machine *machine); int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine, bool mmap_data, @@ -402,7 +531,21 @@ int perf_event__synthesize_threads(struct perf_tool *tool, int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); - +int perf_event__synthesize_stat_config(struct perf_tool *tool, + struct perf_stat_config *config, + perf_event__handler_t process, + struct machine *machine); +void perf_event__read_stat_config(struct perf_stat_config *config, + struct stat_config_event *event); +int perf_event__synthesize_stat(struct perf_tool *tool, + u32 cpu, u32 thread, u64 id, + struct perf_counts_values *count, + perf_event__handler_t process, + struct machine *machine); +int perf_event__synthesize_stat_round(struct perf_tool *tool, + u64 time, u64 type, + perf_event__handler_t process, + struct machine *machine); int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); @@ -499,9 +642,14 @@ size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, const char *symbol_name); +void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max); +void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map, + u16 type, int max); #endif /* __PERF_RECORD_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d1392194a9a9..86a03836a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -18,7 +18,7 @@ #include <unistd.h> #include "parse-events.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include <sys/mman.h> @@ -68,6 +68,18 @@ struct perf_evlist *perf_evlist__new_default(void) return evlist; } +struct perf_evlist *perf_evlist__new_dummy(void) +{ + struct perf_evlist *evlist = perf_evlist__new(); + + if (evlist && perf_evlist__add_dummy(evlist)) { + perf_evlist__delete(evlist); + evlist = NULL; + } + + return evlist; +} + /** * perf_evlist__set_id_pos - set the positions of event ids. * @evlist: selected event list @@ -248,6 +260,22 @@ error: return -ENOMEM; } +int perf_evlist__add_dummy(struct perf_evlist *evlist) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_DUMMY, + .size = sizeof(attr), /* to capture ABI version */ + }; + struct perf_evsel *evsel = perf_evsel__new(&attr); + + if (evsel == NULL) + return -ENOMEM; + + perf_evlist__add(evlist, evsel); + return 0; +} + static int perf_evlist__add_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) { @@ -336,20 +364,12 @@ static int perf_evlist__nr_threads(struct perf_evlist *evlist, void perf_evlist__disable(struct perf_evlist *evlist) { - int cpu, thread; struct perf_evsel *pos; - int nr_cpus = cpu_map__nr(evlist->cpus); - int nr_threads; - for (cpu = 0; cpu < nr_cpus; cpu++) { - evlist__for_each(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) - continue; - nr_threads = perf_evlist__nr_threads(evlist, pos); - for (thread = 0; thread < nr_threads; thread++) - ioctl(FD(pos, cpu, thread), - PERF_EVENT_IOC_DISABLE, 0); - } + evlist__for_each(evlist, pos) { + if (!perf_evsel__is_group_leader(pos) || !pos->fd) + continue; + perf_evsel__disable(pos); } evlist->enabled = false; @@ -357,20 +377,12 @@ void perf_evlist__disable(struct perf_evlist *evlist) void perf_evlist__enable(struct perf_evlist *evlist) { - int cpu, thread; struct perf_evsel *pos; - int nr_cpus = cpu_map__nr(evlist->cpus); - int nr_threads; - for (cpu = 0; cpu < nr_cpus; cpu++) { - evlist__for_each(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) - continue; - nr_threads = perf_evlist__nr_threads(evlist, pos); - for (thread = 0; thread < nr_threads; thread++) - ioctl(FD(pos, cpu, thread), - PERF_EVENT_IOC_ENABLE, 0); - } + evlist__for_each(evlist, pos) { + if (!perf_evsel__is_group_leader(pos) || !pos->fd) + continue; + perf_evsel__enable(pos); } evlist->enabled = true; @@ -381,48 +393,6 @@ void perf_evlist__toggle_enable(struct perf_evlist *evlist) (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); } -int perf_evlist__disable_event(struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - int cpu, thread, err; - int nr_cpus = cpu_map__nr(evlist->cpus); - int nr_threads = perf_evlist__nr_threads(evlist, evsel); - - if (!evsel->fd) - return 0; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - for (thread = 0; thread < nr_threads; thread++) { - err = ioctl(FD(evsel, cpu, thread), - PERF_EVENT_IOC_DISABLE, 0); - if (err) - return err; - } - } - return 0; -} - -int perf_evlist__enable_event(struct perf_evlist *evlist, - struct perf_evsel *evsel) -{ - int cpu, thread, err; - int nr_cpus = cpu_map__nr(evlist->cpus); - int nr_threads = perf_evlist__nr_threads(evlist, evsel); - - if (!evsel->fd) - return -EINVAL; - - for (cpu = 0; cpu < nr_cpus; cpu++) { - for (thread = 0; thread < nr_threads; thread++) { - err = ioctl(FD(evsel, cpu, thread), - PERF_EVENT_IOC_ENABLE, 0); - if (err) - return err; - } - } - return 0; -} - static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu) { @@ -550,9 +520,9 @@ void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, evsel->id[evsel->ids++] = id; } -static int perf_evlist__id_add_fd(struct perf_evlist *evlist, - struct perf_evsel *evsel, - int cpu, int thread, int fd) +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd) { u64 read_data[4] = { 0, }; int id_idx = 1; /* The first entry is the counter value */ @@ -1211,12 +1181,12 @@ void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, */ if (cpus != evlist->cpus) { cpu_map__put(evlist->cpus); - evlist->cpus = cpus; + evlist->cpus = cpu_map__get(cpus); } if (threads != evlist->threads) { thread_map__put(evlist->threads); - evlist->threads = threads; + evlist->threads = thread_map__get(threads); } perf_evlist__propagate_maps(evlist); @@ -1253,6 +1223,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) int err = 0; evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + err = perf_evsel__set_filter(evsel, filter); if (err) break; @@ -1486,7 +1459,7 @@ int perf_evlist__open(struct perf_evlist *evlist) perf_evlist__update_id_pos(evlist); evlist__for_each(evlist, evsel) { - err = perf_evsel__open(evsel, evlist->cpus, evlist->threads); + err = perf_evsel__open(evsel, evsel->cpus, evsel->threads); if (err < 0) goto out_err; } @@ -1654,7 +1627,7 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) return printed + fprintf(fp, "\n"); } -int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, +int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size) { int printed, value; @@ -1682,7 +1655,25 @@ int perf_evlist__strerror_open(struct perf_evlist *evlist __maybe_unused, "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n" "Hint:\tThe current value is %d.", value); break; + case EINVAL: { + struct perf_evsel *first = perf_evlist__first(evlist); + int max_freq; + + if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0) + goto out_default; + + if (first->attr.sample_freq < (u64)max_freq) + goto out_default; + + printed = scnprintf(buf, size, + "Error:\t%s.\n" + "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n" + "Hint:\tThe current value is %d and %" PRIu64 " is being requested.", + emsg, max_freq, first->attr.sample_freq); + break; + } default: +out_default: scnprintf(buf, size, "%s", emsg); break; } @@ -1753,3 +1744,19 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, tracking_evsel->tracking = true; } + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, + const char *str) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (!evsel->name) + continue; + if (strcmp(str, evsel->name) == 0) + return evsel; + } + + return NULL; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index a459fe71b452..a0d15221db6e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -67,6 +67,7 @@ struct perf_evsel_str_handler { struct perf_evlist *perf_evlist__new(void); struct perf_evlist *perf_evlist__new_default(void); +struct perf_evlist *perf_evlist__new_dummy(void); void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus, struct thread_map *threads); void perf_evlist__exit(struct perf_evlist *evlist); @@ -81,6 +82,8 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, #define perf_evlist__add_default_attrs(evlist, array) \ __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) +int perf_evlist__add_dummy(struct perf_evlist *evlist); + int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); @@ -97,6 +100,9 @@ perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist, void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id); +int perf_evlist__id_add_fd(struct perf_evlist *evlist, + struct perf_evsel *evsel, + int cpu, int thread, int fd); int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd); int perf_evlist__alloc_pollfd(struct perf_evlist *evlist); @@ -149,10 +155,6 @@ void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); void perf_evlist__toggle_enable(struct perf_evlist *evlist); -int perf_evlist__disable_event(struct perf_evlist *evlist, - struct perf_evsel *evsel); -int perf_evlist__enable_event(struct perf_evlist *evlist, - struct perf_evsel *evsel); int perf_evlist__enable_event_idx(struct perf_evlist *evlist, struct perf_evsel *evsel, int idx); @@ -292,4 +294,7 @@ void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); + +struct perf_evsel * +perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 397fb4ed3c97..0902fe418754 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ static struct { bool cloexec; bool clockid; bool clockid_wrong; + bool lbr_flags; } perf_missing_features; static clockid_t clockid; @@ -224,6 +225,11 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) if (evsel != NULL) perf_evsel__init(evsel, attr, idx); + if (perf_evsel__is_bpf_output(evsel)) { + evsel->attr.sample_type |= PERF_SAMPLE_RAW; + evsel->attr.sample_period = 1; + } + return evsel; } @@ -574,7 +580,9 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, } else { perf_evsel__set_sample_bit(evsel, BRANCH_STACK); attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | - PERF_SAMPLE_BRANCH_CALL_STACK; + PERF_SAMPLE_BRANCH_CALL_STACK | + PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_NO_FLAGS; } } else pr_warning("Cannot use LBR callstack with branch stack. " @@ -895,6 +903,16 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (evsel->precise_max) perf_event_attr__set_max_precise_ip(attr); + if (opts->all_user) { + attr->exclude_kernel = 1; + attr->exclude_user = 0; + } + + if (opts->all_kernel) { + attr->exclude_kernel = 0; + attr->exclude_user = 1; + } + /* * Apply event specific term settings, * it overloads any global configuration. @@ -981,13 +999,26 @@ int perf_evsel__append_filter(struct perf_evsel *evsel, return -1; } -int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) +int perf_evsel__enable(struct perf_evsel *evsel) { + int nthreads = thread_map__nr(evsel->threads); + int ncpus = cpu_map__nr(evsel->cpus); + return perf_evsel__run_ioctl(evsel, ncpus, nthreads, PERF_EVENT_IOC_ENABLE, 0); } +int perf_evsel__disable(struct perf_evsel *evsel) +{ + int nthreads = thread_map__nr(evsel->threads); + int ncpus = cpu_map__nr(evsel->cpus); + + return perf_evsel__run_ioctl(evsel, ncpus, nthreads, + PERF_EVENT_IOC_DISABLE, + 0); +} + int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) { if (ncpus == 0 || nthreads == 0) @@ -1192,6 +1223,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), + bit_name(WEIGHT), { .name = NULL, } }; #undef bit_name @@ -1323,6 +1355,9 @@ fallback_missing_features: evsel->attr.mmap2 = 0; if (perf_missing_features.exclude_guest) evsel->attr.exclude_guest = evsel->attr.exclude_host = 0; + if (perf_missing_features.lbr_flags) + evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES); retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; @@ -1441,6 +1476,12 @@ try_fallback: } else if (!perf_missing_features.sample_id_all) { perf_missing_features.sample_id_all = true; goto retry_sample_id; + } else if (!perf_missing_features.lbr_flags && + (evsel->attr.branch_sample_type & + (PERF_SAMPLE_BRANCH_NO_CYCLES | + PERF_SAMPLE_BRANCH_NO_FLAGS))) { + perf_missing_features.lbr_flags = true; + goto fallback_missing_features; } out_close: @@ -2272,6 +2313,29 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += comma_fprintf(fp, &first, " %s=%" PRIu64, term, (u64)evsel->attr.sample_freq); } + + if (details->trace_fields) { + struct format_field *field; + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { + printed += comma_fprintf(fp, &first, " (not a tracepoint)"); + goto out; + } + + field = evsel->tp_format->format.fields; + if (field == NULL) { + printed += comma_fprintf(fp, &first, " (no trace field)"); + goto out; + } + + printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name); + + field = field->next; + while (field) { + printed += comma_fprintf(fp, &first, "%s", field->name); + field = field->next; + } + } out: fputc('\n', fp); return ++printed; @@ -2313,12 +2377,15 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, case EPERM: case EACCES: return scnprintf(msg, size, - "You may not have permission to collect %sstats.\n" - "Consider tweaking /proc/sys/kernel/perf_event_paranoid:\n" - " -1 - Not paranoid at all\n" - " 0 - Disallow raw tracepoint access for unpriv\n" - " 1 - Disallow cpu events for unpriv\n" - " 2 - Disallow kernel profiling for unpriv", + "You may not have permission to collect %sstats.\n\n" + "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" + "which controls use of the performance events system by\n" + "unprivileged users (without CAP_SYS_ADMIN).\n\n" + "The default value is 1:\n\n" + " -1: Allow use of (almost) all events by all users\n" + ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" + ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" + ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", target->system_wide ? "system-wide " : ""); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0e49bd742c63..501ea6e565f1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -93,10 +93,8 @@ struct perf_evsel { const char *unit; struct event_format *tp_format; off_t id_offset; - union { - void *priv; - u64 db_id; - }; + void *priv; + u64 db_id; struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; @@ -227,7 +225,8 @@ int perf_evsel__append_filter(struct perf_evsel *evsel, const char *op, const char *filter); int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, const char *filter); -int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); +int perf_evsel__enable(struct perf_evsel *evsel); +int perf_evsel__disable(struct perf_evsel *evsel); int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus); @@ -363,11 +362,20 @@ static inline bool perf_evsel__is_function_event(struct perf_evsel *evsel) #undef FUNCTION_EVENT } +static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel) +{ + struct perf_event_attr *attr = &evsel->attr; + + return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) && + (attr->type == PERF_TYPE_SOFTWARE); +} + struct perf_attr_details { bool freq; bool verbose; bool event_group; bool force; + bool trace_fields; }; int perf_evsel__fprintf(struct perf_evsel *evsel, diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c deleted file mode 100644 index 7adf4ad15d8f..000000000000 --- a/tools/perf/util/exec_cmd.c +++ /dev/null @@ -1,148 +0,0 @@ -#include "cache.h" -#include "exec_cmd.h" -#include "quote.h" - -#include <string.h> - -#define MAX_ARGS 32 - -static const char *argv_exec_path; -static const char *argv0_path; - -const char *system_path(const char *path) -{ - static const char *prefix = PREFIX; - struct strbuf d = STRBUF_INIT; - - if (is_absolute_path(path)) - return path; - - strbuf_addf(&d, "%s/%s", prefix, path); - path = strbuf_detach(&d, NULL); - return path; -} - -const char *perf_extract_argv0_path(const char *argv0) -{ - const char *slash; - - if (!argv0 || !*argv0) - return NULL; - slash = argv0 + strlen(argv0); - - while (argv0 <= slash && !is_dir_sep(*slash)) - slash--; - - if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); - return argv0_path ? slash + 1 : NULL; - } - - return argv0; -} - -void perf_set_argv_exec_path(const char *exec_path) -{ - argv_exec_path = exec_path; - /* - * Propagate this setting to external programs. - */ - setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); -} - - -/* Returns the highest-priority, location to look for perf programs. */ -const char *perf_exec_path(void) -{ - const char *env; - - if (argv_exec_path) - return argv_exec_path; - - env = getenv(EXEC_PATH_ENVIRONMENT); - if (env && *env) { - return env; - } - - return system_path(PERF_EXEC_PATH); -} - -static void add_path(struct strbuf *out, const char *path) -{ - if (path && *path) { - if (is_absolute_path(path)) - strbuf_addstr(out, path); - else - strbuf_addstr(out, make_nonrelative_path(path)); - - strbuf_addch(out, PATH_SEP); - } -} - -void setup_path(void) -{ - const char *old_path = getenv("PATH"); - struct strbuf new_path = STRBUF_INIT; - - add_path(&new_path, perf_exec_path()); - add_path(&new_path, argv0_path); - - if (old_path) - strbuf_addstr(&new_path, old_path); - else - strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); - - setenv("PATH", new_path.buf, 1); - - strbuf_release(&new_path); -} - -static const char **prepare_perf_cmd(const char **argv) -{ - int argc; - const char **nargv; - - for (argc = 0; argv[argc]; argc++) - ; /* just counting */ - nargv = malloc(sizeof(*nargv) * (argc + 2)); - - nargv[0] = "perf"; - for (argc = 0; argv[argc]; argc++) - nargv[argc + 1] = argv[argc]; - nargv[argc + 1] = NULL; - return nargv; -} - -int execv_perf_cmd(const char **argv) { - const char **nargv = prepare_perf_cmd(argv); - - /* execvp() can only ever return if it fails */ - execvp("perf", (char **)nargv); - - free(nargv); - return -1; -} - - -int execl_perf_cmd(const char *cmd,...) -{ - int argc; - const char *argv[MAX_ARGS + 1]; - const char *arg; - va_list param; - - va_start(param, cmd); - argv[0] = cmd; - argc = 1; - while (argc < MAX_ARGS) { - arg = argv[argc++] = va_arg(param, char *); - if (!arg) - break; - } - va_end(param); - if (MAX_ARGS <= argc) - return error("too many args to run %s", cmd); - - argv[argc] = NULL; - return execv_perf_cmd(argv); -} diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h deleted file mode 100644 index bc4b915963f5..000000000000 --- a/tools/perf/util/exec_cmd.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef __PERF_EXEC_CMD_H -#define __PERF_EXEC_CMD_H - -extern void perf_set_argv_exec_path(const char *exec_path); -extern const char *perf_extract_argv0_path(const char *path); -extern const char *perf_exec_path(void); -extern void setup_path(void); -extern int execv_perf_cmd(const char **argv); /* NULL terminated */ -extern int execl_perf_cmd(const char *cmd, ...); -extern const char *system_path(const char *path); - -#endif /* __PERF_EXEC_CMD_H */ diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c new file mode 100644 index 000000000000..c1ef805c6a8f --- /dev/null +++ b/tools/perf/util/genelf.c @@ -0,0 +1,449 @@ +/* + * genelf.c + * Copyright (C) 2014, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@gmail.com> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define JVMTI + +#define BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef HAVE_LIBCRYPTO + +#define BUILD_ID_MD5 +#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ +#undef BUILD_ID_URANDOM /* different uuid for each run */ + +#ifdef BUILD_ID_SHA +#include <openssl/sha.h> +#endif + +#ifdef BUILD_ID_MD5 +#include <openssl/md5.h> +#endif +#endif + + +typedef struct { + unsigned int namesz; /* Size of entry's owner string */ + unsigned int descsz; /* Size of the note descriptor */ + unsigned int type; /* Interpretation of the descriptor */ + char name[0]; /* Start of the name+desc data */ +} Elf_Note; + +struct options { + char *output; + int fd; +}; + +static char shd_string_table[] = { + 0, + '.', 't', 'e', 'x', 't', 0, /* 1 */ + '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */ + '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */ + '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */ + '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */ + '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */ +}; + +static struct buildid_note { + Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */ + char name[4]; /* GNU\0 */ + char build_id[20]; +} bnote; + +static Elf_Sym symtab[]={ + /* symbol 0 MUST be the undefined symbol */ + { .st_name = 0, /* index in sym_string table */ + .st_info = ELF_ST_TYPE(STT_NOTYPE), + .st_shndx = 0, /* for now */ + .st_value = 0x0, + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, + }, + { .st_name = 1, /* index in sym_string table */ + .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC), + .st_shndx = 1, + .st_value = 0, /* for now */ + .st_other = ELF_ST_VIS(STV_DEFAULT), + .st_size = 0, /* for now */ + } +}; + +#ifdef BUILD_ID_URANDOM +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code __maybe_unused, + size_t csize __maybe_unused) +{ + int fd; + size_t sz = sizeof(note->build_id); + ssize_t sret; + + fd = open("/dev/urandom", O_RDONLY); + if (fd == -1) + err(1, "cannot access /dev/urandom for builid"); + + sret = read(fd, note->build_id, sz); + + close(fd); + + if (sret != (ssize_t)sz) + memset(note->build_id, 0, sz); +} +#endif + +#ifdef BUILD_ID_SHA +static void +gen_build_id(struct buildid_note *note, + unsigned long load_addr __maybe_unused, + const void *code, + size_t csize) +{ + if (sizeof(note->build_id) < SHA_DIGEST_LENGTH) + errx(1, "build_id too small for SHA1"); + + SHA1(code, csize, (unsigned char *)note->build_id); +} +#endif + +#ifdef BUILD_ID_MD5 +static void +gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize) +{ + MD5_CTX context; + + if (sizeof(note->build_id) < 16) + errx(1, "build_id too small for MD5"); + + MD5_Init(&context); + MD5_Update(&context, &load_addr, sizeof(load_addr)); + MD5_Update(&context, code, csize); + MD5_Final((unsigned char *)note->build_id, &context); +} +#endif + +/* + * fd: file descriptor open for writing for the output file + * load_addr: code load address (could be zero, just used for buildid) + * sym: function name (for native code - used as the symbol) + * code: the native code + * csize: the code size in bytes + */ +int +jit_write_elf(int fd, uint64_t load_addr, const char *sym, + const void *code, int csize, + void *debug, int nr_debug_entries) +{ + Elf *e; + Elf_Data *d; + Elf_Scn *scn; + Elf_Ehdr *ehdr; + Elf_Shdr *shdr; + char *strsym = NULL; + int symlen; + int retval = -1; + + if (elf_version(EV_CURRENT) == EV_NONE) { + warnx("ELF initialization failed"); + return -1; + } + + e = elf_begin(fd, ELF_C_WRITE, NULL); + if (!e) { + warnx("elf_begin failed"); + goto error; + } + + /* + * setup ELF header + */ + ehdr = elf_newehdr(e); + if (!ehdr) { + warnx("cannot get ehdr"); + goto error; + } + + ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN; + ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS; + ehdr->e_machine = GEN_ELF_ARCH; + ehdr->e_type = ET_DYN; + ehdr->e_entry = GEN_ELF_TEXT_OFFSET; + ehdr->e_version = EV_CURRENT; + ehdr->e_shstrndx= 2; /* shdr index for section name */ + + /* + * setup text section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 16; + d->d_off = 0LL; + d->d_buf = (void *)code; + d->d_type = ELF_T_BYTE; + d->d_size = csize; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 1; + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = GEN_ELF_TEXT_OFFSET; + shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + shdr->sh_entsize = 0; + + /* + * setup section headers string table + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = shd_string_table; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(shd_string_table); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup symtab section + */ + symtab[1].st_size = csize; + symtab[1].st_value = GEN_ELF_TEXT_OFFSET; + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 8; + d->d_off = 0LL; + d->d_buf = symtab; + d->d_type = ELF_T_SYM; + d->d_size = sizeof(symtab); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */ + shdr->sh_type = SHT_SYMTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = sizeof(Elf_Sym); + shdr->sh_link = 4; /* index of .strtab section */ + + /* + * setup symbols string table + * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry + */ + symlen = 2 + strlen(sym); + strsym = calloc(1, symlen); + if (!strsym) { + warnx("cannot allocate strsym"); + goto error; + } + strcpy(strsym + 1, sym); + + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = strsym; + d->d_type = ELF_T_BYTE; + d->d_size = symlen; + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 25; /* offset in shd_string_table */ + shdr->sh_type = SHT_STRTAB; + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup build-id section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + goto error; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + goto error; + } + + /* + * build-id generation + */ + gen_build_id(&bnote, load_addr, code, csize); + bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ + bnote.desc.descsz = sizeof(bnote.build_id); + bnote.desc.type = NT_GNU_BUILD_ID; + strcpy(bnote.name, "GNU"); + + d->d_align = 4; + d->d_off = 0LL; + d->d_buf = &bnote; + d->d_type = ELF_T_BYTE; + d->d_size = sizeof(bnote); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + goto error; + } + + shdr->sh_name = 33; /* offset in shd_string_table */ + shdr->sh_type = SHT_NOTE; + shdr->sh_addr = 0x0; + shdr->sh_flags = SHF_ALLOC; + shdr->sh_size = sizeof(bnote); + shdr->sh_entsize = 0; + + if (debug && nr_debug_entries) { + retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); + if (retval) + goto error; + } else { + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update 4 failed"); + goto error; + } + } + + retval = 0; +error: + (void)elf_end(e); + + free(strsym); + + + return retval; +} + +#ifndef JVMTI + +static unsigned char x86_code[] = { + 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */ + 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */ + 0xCD, 0x80 /* int $0x80 */ +}; + +static struct options options; + +int main(int argc, char **argv) +{ + int c, fd, ret; + + while ((c = getopt(argc, argv, "o:h")) != -1) { + switch (c) { + case 'o': + options.output = optarg; + break; + case 'h': + printf("Usage: genelf -o output_file [-h]\n"); + return 0; + default: + errx(1, "unknown option"); + } + } + + fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666); + if (fd == -1) + err(1, "cannot create file %s", options.output); + + ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code)); + close(fd); + + if (ret != 0) + unlink(options.output); + + return ret; +} +#endif diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h new file mode 100644 index 000000000000..45bf9c6d3257 --- /dev/null +++ b/tools/perf/util/genelf.h @@ -0,0 +1,67 @@ +#ifndef __GENELF_H__ +#define __GENELF_H__ + +/* genelf.c */ +extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym, + const void *code, int csize, + void *debug, int nr_debug_entries); +/* genelf_debug.c */ +extern int jit_add_debug_info(Elf *e, uint64_t code_addr, + void *debug, int nr_debug_entries); + +#if defined(__arm__) +#define GEN_ELF_ARCH EM_ARM +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__aarch64__) +#define GEN_ELF_ARCH EM_AARCH64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__x86_64__) +#define GEN_ELF_ARCH EM_X86_64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__i386__) +#define GEN_ELF_ARCH EM_386 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__ppcle__) +#define GEN_ELF_ARCH EM_PPC +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpc__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2MSB +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__powerpcle__) +#define GEN_ELF_ARCH EM_PPC64 +#define GEN_ELF_ENDIAN ELFDATA2LSB +#define GEN_ELF_CLASS ELFCLASS64 +#else +#error "unsupported architecture" +#endif + +#if GEN_ELF_CLASS == ELFCLASS64 +#define elf_newehdr elf64_newehdr +#define elf_getshdr elf64_getshdr +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF64_ST_BIND(a) +#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a) +#else +#define elf_newehdr elf32_newehdr +#define elf_getshdr elf32_getshdr +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a) +#define ELF_ST_BIND(a) ELF32_ST_BIND(a) +#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a) +#endif + +/* The .text section is directly after the ELF header */ +#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr) + +#endif diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c new file mode 100644 index 000000000000..5980f7d256b1 --- /dev/null +++ b/tools/perf/util/genelf_debug.c @@ -0,0 +1,610 @@ +/* + * genelf_debug.c + * Copyright (C) 2015, Google, Inc + * + * Contributed by: + * Stephane Eranian <eranian@google.com> + * + * Released under the GPL v2. + * + * based on GPLv2 source code from Oprofile + * @remark Copyright 2007 OProfile authors + * @author Philippe Elie + */ +#include <sys/types.h> +#include <stdio.h> +#include <getopt.h> +#include <stddef.h> +#include <libelf.h> +#include <string.h> +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> +#include <fcntl.h> +#include <err.h> +#include <dwarf.h> + +#include "perf.h" +#include "genelf.h" +#include "../util/jitdump.h" + +#define BUFFER_EXT_DFL_SIZE (4 * 1024) + +typedef uint32_t uword; +typedef uint16_t uhalf; +typedef int32_t sword; +typedef int16_t shalf; +typedef uint8_t ubyte; +typedef int8_t sbyte; + +struct buffer_ext { + size_t cur_pos; + size_t max_sz; + void *data; +}; + +static void +buffer_ext_dump(struct buffer_ext *be, const char *msg) +{ + size_t i; + warnx("DUMP for %s", msg); + for (i = 0 ; i < be->cur_pos; i++) + warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff); +} + +static inline int +buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz) +{ + void *tmp; + size_t be_sz = be->max_sz; + +retry: + if ((be->cur_pos + sz) < be_sz) { + memcpy(be->data + be->cur_pos, addr, sz); + be->cur_pos += sz; + return 0; + } + + if (!be_sz) + be_sz = BUFFER_EXT_DFL_SIZE; + else + be_sz <<= 1; + + tmp = realloc(be->data, be_sz); + if (!tmp) + return -1; + + be->data = tmp; + be->max_sz = be_sz; + + goto retry; +} + +static void +buffer_ext_init(struct buffer_ext *be) +{ + be->data = NULL; + be->cur_pos = 0; + be->max_sz = 0; +} + +static inline size_t +buffer_ext_size(struct buffer_ext *be) +{ + return be->cur_pos; +} + +static inline void * +buffer_ext_addr(struct buffer_ext *be) +{ + return be->data; +} + +struct debug_line_header { + // Not counting this field + uword total_length; + // version number (2 currently) + uhalf version; + // relative offset from next field to + // program statement + uword prolog_length; + ubyte minimum_instruction_length; + ubyte default_is_stmt; + // line_base - see DWARF 2 specs + sbyte line_base; + // line_range - see DWARF 2 specs + ubyte line_range; + // number of opcode + 1 + ubyte opcode_base; + /* follow the array of opcode args nr: ubytes [nr_opcode_base] */ + /* follow the search directories index, zero terminated string + * terminated by an empty string. + */ + /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is + * the directory index entry, 0 means current directory, then mtime + * and filesize, last entry is followed by en empty string. + */ + /* follow the first program statement */ +} __attribute__((packed)); + +/* DWARF 2 spec talk only about one possible compilation unit header while + * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not + * related to the used arch, an ELF 32 can hold more than 4 Go of debug + * information. For now we handle only DWARF 2 32 bits comp unit. It'll only + * become a problem if we generate more than 4GB of debug information. + */ +struct compilation_unit_header { + uword total_length; + uhalf version; + uword debug_abbrev_offset; + ubyte pointer_size; +} __attribute__((packed)); + +#define DW_LNS_num_opcode (DW_LNS_set_isa + 1) + +/* field filled at run time are marked with -1 */ +static struct debug_line_header const default_debug_line_header = { + .total_length = -1, + .version = 2, + .prolog_length = -1, + .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */ + .default_is_stmt = 1, /* we don't take care about basic block */ + .line_base = -5, /* sensible value for line base ... */ + .line_range = -14, /* ... and line range are guessed statically */ + .opcode_base = DW_LNS_num_opcode +}; + +static ubyte standard_opcode_length[] = +{ + 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 +}; +#if 0 +{ + [DW_LNS_advance_pc] = 1, + [DW_LNS_advance_line] = 1, + [DW_LNS_set_file] = 1, + [DW_LNS_set_column] = 1, + [DW_LNS_fixed_advance_pc] = 1, + [DW_LNS_set_isa] = 1, +}; +#endif + +/* field filled at run time are marked with -1 */ +static struct compilation_unit_header default_comp_unit_header = { + .total_length = -1, + .version = 2, + .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */ + .pointer_size = sizeof(void *) +}; + +static void emit_uword(struct buffer_ext *be, uword data) +{ + buffer_ext_add(be, &data, sizeof(uword)); +} + +static void emit_string(struct buffer_ext *be, const char *s) +{ + buffer_ext_add(be, (void *)s, strlen(s) + 1); +} + +static void emit_unsigned_LEB128(struct buffer_ext *be, + unsigned long data) +{ + do { + ubyte cur = data & 0x7F; + data >>= 7; + if (data) + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } while (data); +} + +static void emit_signed_LEB128(struct buffer_ext *be, long data) +{ + int more = 1; + int negative = data < 0; + int size = sizeof(long) * CHAR_BIT; + while (more) { + ubyte cur = data & 0x7F; + data >>= 7; + if (negative) + data |= - (1 << (size - 7)); + if ((data == 0 && !(cur & 0x40)) || + (data == -1l && (cur & 0x40))) + more = 0; + else + cur |= 0x80; + buffer_ext_add(be, &cur, 1); + } +} + +static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode, + void *data, size_t data_len) +{ + buffer_ext_add(be, (char *)"", 1); + + emit_unsigned_LEB128(be, data_len + 1); + + buffer_ext_add(be, &opcode, 1); + buffer_ext_add(be, data, data_len); +} + +static void emit_opcode(struct buffer_ext *be, ubyte opcode) +{ + buffer_ext_add(be, &opcode, 1); +} + +static void emit_opcode_signed(struct buffer_ext *be, + ubyte opcode, long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_signed_LEB128(be, data); +} + +static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode, + unsigned long data) +{ + buffer_ext_add(be, &opcode, 1); + emit_unsigned_LEB128(be, data); +} + +static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc) +{ + emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc); +} + +static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno) +{ + emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno); +} + +static void emit_lne_end_of_sequence(struct buffer_ext *be) +{ + emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0); +} + +static void emit_set_file(struct buffer_ext *be, unsigned long idx) +{ + emit_opcode_unsigned(be, DW_LNS_set_file, idx); +} + +static void emit_lne_define_filename(struct buffer_ext *be, + const char *filename) +{ + buffer_ext_add(be, (void *)"", 1); + + /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */ + emit_unsigned_LEB128(be, strlen(filename) + 5); + emit_opcode(be, DW_LNE_define_file); + emit_string(be, filename); + /* directory index 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* last modification date on file 0=do not know */ + emit_unsigned_LEB128(be, 0); + /* filesize 0=do not know */ + emit_unsigned_LEB128(be, 0); +} + +static void emit_lne_set_address(struct buffer_ext *be, + void *address) +{ + emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long)); +} + +static ubyte get_special_opcode(struct debug_entry *ent, + unsigned int last_line, + unsigned long last_vma) +{ + unsigned int temp; + unsigned long delta_addr; + + /* + * delta from line_base + */ + temp = (ent->lineno - last_line) - default_debug_line_header.line_base; + + if (temp >= default_debug_line_header.line_range) + return 0; + + /* + * delta of addresses + */ + delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length; + + /* This is not sufficient to ensure opcode will be in [0-256] but + * sufficient to ensure when summing with the delta lineno we will + * not overflow the unsigned long opcode */ + + if (delta_addr <= 256 / default_debug_line_header.line_range) { + unsigned long opcode = temp + + (delta_addr * default_debug_line_header.line_range) + + default_debug_line_header.opcode_base; + + return opcode <= 255 ? opcode : 0; + } + return 0; +} + +static void emit_lineno_info(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + size_t i; + + /* + * Machine state at start of a statement program + * address = 0 + * file = 1 + * line = 1 + * column = 0 + * is_stmt = default_is_stmt as given in the debug_line_header + * basic block = 0 + * end sequence = 0 + */ + + /* start state of the state machine we take care of */ + unsigned long last_vma = code_addr; + char const *cur_filename = NULL; + unsigned long cur_file_idx = 0; + int last_line = 1; + + emit_lne_set_address(be, (void *)code_addr); + + for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) { + int need_copy = 0; + ubyte special_opcode; + + /* + * check if filename changed, if so add it + */ + if (!cur_filename || strcmp(cur_filename, ent->name)) { + emit_lne_define_filename(be, ent->name); + cur_filename = ent->name; + emit_set_file(be, ++cur_file_idx); + need_copy = 1; + } + + special_opcode = get_special_opcode(ent, last_line, last_vma); + if (special_opcode != 0) { + last_line = ent->lineno; + last_vma = ent->addr; + emit_opcode(be, special_opcode); + } else { + /* + * lines differ, emit line delta + */ + if (last_line != ent->lineno) { + emit_advance_lineno(be, ent->lineno - last_line); + last_line = ent->lineno; + need_copy = 1; + } + /* + * addresses differ, emit address delta + */ + if (last_vma != ent->addr) { + emit_advance_pc(be, ent->addr - last_vma); + last_vma = ent->addr; + need_copy = 1; + } + /* + * add new row to matrix + */ + if (need_copy) + emit_opcode(be, DW_LNS_copy); + } + } +} + +static void add_debug_line(struct buffer_ext *be, + struct debug_entry *ent, size_t nr_entry, + unsigned long code_addr) +{ + struct debug_line_header * dbg_header; + size_t old_size; + + old_size = buffer_ext_size(be); + + buffer_ext_add(be, (void *)&default_debug_line_header, + sizeof(default_debug_line_header)); + + buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length)); + + // empty directory entry + buffer_ext_add(be, (void *)"", 1); + + // empty filename directory + buffer_ext_add(be, (void *)"", 1); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->prolog_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, minimum_instruction_length); + + emit_lineno_info(be, ent, nr_entry, code_addr); + + emit_lne_end_of_sequence(be); + + dbg_header = buffer_ext_addr(be) + old_size; + dbg_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct debug_line_header, version); +} + +static void +add_debug_abbrev(struct buffer_ext *be) +{ + emit_unsigned_LEB128(be, 1); + emit_unsigned_LEB128(be, DW_TAG_compile_unit); + emit_unsigned_LEB128(be, DW_CHILDREN_yes); + emit_unsigned_LEB128(be, DW_AT_stmt_list); + emit_unsigned_LEB128(be, DW_FORM_data4); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); + emit_unsigned_LEB128(be, 0); +} + +static void +add_compilation_unit(struct buffer_ext *be, + size_t offset_debug_line) +{ + struct compilation_unit_header *comp_unit_header; + size_t old_size = buffer_ext_size(be); + + buffer_ext_add(be, &default_comp_unit_header, + sizeof(default_comp_unit_header)); + + emit_unsigned_LEB128(be, 1); + emit_uword(be, offset_debug_line); + + comp_unit_header = buffer_ext_addr(be) + old_size; + comp_unit_header->total_length = (buffer_ext_size(be) - old_size) - + offsetof(struct compilation_unit_header, version); +} + +static int +jit_process_debug_info(uint64_t code_addr, + void *debug, int nr_debug_entries, + struct buffer_ext *dl, + struct buffer_ext *da, + struct buffer_ext *di) +{ + struct debug_entry *ent = debug; + int i; + + for (i = 0; i < nr_debug_entries; i++) { + ent->addr = ent->addr - code_addr; + ent = debug_entry_next(ent); + } + add_compilation_unit(di, buffer_ext_size(dl)); + add_debug_line(dl, debug, nr_debug_entries, 0); + add_debug_abbrev(da); + if (0) buffer_ext_dump(da, "abbrev"); + + return 0; +} + +int +jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries) +{ + Elf_Data *d; + Elf_Scn *scn; + Elf_Shdr *shdr; + struct buffer_ext dl, di, da; + int ret; + + buffer_ext_init(&dl); + buffer_ext_init(&di); + buffer_ext_init(&da); + + ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di); + if (ret) + return -1; + /* + * setup .debug_line section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&dl); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&dl); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 52; /* .debug_line */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_info section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&di); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&di); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 64; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * setup .debug_abbrev section + */ + scn = elf_newscn(e); + if (!scn) { + warnx("cannot create section"); + return -1; + } + + d = elf_newdata(scn); + if (!d) { + warnx("cannot get new data"); + return -1; + } + + d->d_align = 1; + d->d_off = 0LL; + d->d_buf = buffer_ext_addr(&da); + d->d_type = ELF_T_BYTE; + d->d_size = buffer_ext_size(&da); + d->d_version = EV_CURRENT; + + shdr = elf_getshdr(scn); + if (!shdr) { + warnx("cannot get section header"); + return -1; + } + + shdr->sh_name = 76; /* .debug_info */ + shdr->sh_type = SHT_PROGBITS; + shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */ + shdr->sh_flags = 0; + shdr->sh_entsize = 0; + + /* + * now we update the ELF image with all the sections + */ + if (elf_update(e, ELF_C_WRITE) < 0) { + warnx("elf_update debug failed"); + return -1; + } + return 0; +} diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 36a885d2cd22..0ac2037c970c 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -36,4 +36,19 @@ do }' "Documentation/perf-$cmd.txt" done echo "#endif /* HAVE_LIBELF_SUPPORT */" + +echo "#ifdef HAVE_LIBAUDIT_SUPPORT" +sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | +sort | +while read cmd +do + sed -n ' + /^NAME/,/perf-'"$cmd"'/H + ${ + x + s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ + p + }' "Documentation/perf-$cmd.txt" +done +echo "#endif /* HAVE_LIBELF_SUPPORT */" echo "};" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 43838003c1a1..73e38e472ecd 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -23,6 +23,8 @@ #include "strbuf.h" #include "build-id.h" #include "data.h" +#include <api/fs/fs.h> +#include "asm/bug.h" /* * magic2 = "PERFILE2" @@ -724,7 +726,7 @@ static int write_numa_topology(int fd, struct perf_header *h __maybe_unused, done: free(buf); fclose(fp); - free(node_map); + cpu_map__put(node_map); return ret; } @@ -868,6 +870,206 @@ static int write_auxtrace(int fd, struct perf_header *h, return err; } +static int cpu_cache_level__sort(const void *a, const void *b) +{ + struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; + struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b; + + return cache_a->level - cache_b->level; +} + +static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b) +{ + if (a->level != b->level) + return false; + + if (a->line_size != b->line_size) + return false; + + if (a->sets != b->sets) + return false; + + if (a->ways != b->ways) + return false; + + if (strcmp(a->type, b->type)) + return false; + + if (strcmp(a->size, b->size)) + return false; + + if (strcmp(a->map, b->map)) + return false; + + return true; +} + +static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level) +{ + char path[PATH_MAX], file[PATH_MAX]; + struct stat st; + size_t len; + + scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level); + scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path); + + if (stat(file, &st)) + return 1; + + scnprintf(file, PATH_MAX, "%s/level", path); + if (sysfs__read_int(file, (int *) &cache->level)) + return -1; + + scnprintf(file, PATH_MAX, "%s/coherency_line_size", path); + if (sysfs__read_int(file, (int *) &cache->line_size)) + return -1; + + scnprintf(file, PATH_MAX, "%s/number_of_sets", path); + if (sysfs__read_int(file, (int *) &cache->sets)) + return -1; + + scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path); + if (sysfs__read_int(file, (int *) &cache->ways)) + return -1; + + scnprintf(file, PATH_MAX, "%s/type", path); + if (sysfs__read_str(file, &cache->type, &len)) + return -1; + + cache->type[len] = 0; + cache->type = rtrim(cache->type); + + scnprintf(file, PATH_MAX, "%s/size", path); + if (sysfs__read_str(file, &cache->size, &len)) { + free(cache->type); + return -1; + } + + cache->size[len] = 0; + cache->size = rtrim(cache->size); + + scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); + if (sysfs__read_str(file, &cache->map, &len)) { + free(cache->map); + free(cache->type); + return -1; + } + + cache->map[len] = 0; + cache->map = rtrim(cache->map); + return 0; +} + +static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c) +{ + fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map); +} + +static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp) +{ + u32 i, cnt = 0; + long ncpus; + u32 nr, cpu; + u16 level; + + ncpus = sysconf(_SC_NPROCESSORS_CONF); + if (ncpus < 0) + return -1; + + nr = (u32)(ncpus & UINT_MAX); + + for (cpu = 0; cpu < nr; cpu++) { + for (level = 0; level < 10; level++) { + struct cpu_cache_level c; + int err; + + err = cpu_cache_level__read(&c, cpu, level); + if (err < 0) + return err; + + if (err == 1) + break; + + for (i = 0; i < cnt; i++) { + if (cpu_cache_level__cmp(&c, &caches[i])) + break; + } + + if (i == cnt) + caches[cnt++] = c; + else + cpu_cache_level__free(&c); + + if (WARN_ONCE(cnt == size, "way too many cpu caches..")) + goto out; + } + } + out: + *cntp = cnt; + return 0; +} + +#define MAX_CACHES 2000 + +static int write_cache(int fd, struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + struct cpu_cache_level caches[MAX_CACHES]; + u32 cnt = 0, i, version = 1; + int ret; + + ret = build_caches(caches, MAX_CACHES, &cnt); + if (ret) + goto out; + + qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort); + + ret = do_write(fd, &version, sizeof(u32)); + if (ret < 0) + goto out; + + ret = do_write(fd, &cnt, sizeof(u32)); + if (ret < 0) + goto out; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level *c = &caches[i]; + + #define _W(v) \ + ret = do_write(fd, &c->v, sizeof(u32)); \ + if (ret < 0) \ + goto out; + + _W(level) + _W(line_size) + _W(sets) + _W(ways) + #undef _W + + #define _W(v) \ + ret = do_write_string(fd, (const char *) c->v); \ + if (ret < 0) \ + goto out; + + _W(type) + _W(size) + _W(map) + #undef _W + } + +out: + for (i = 0; i < cnt; i++) + cpu_cache_level__free(&caches[i]); + return ret; +} + +static int write_stat(int fd __maybe_unused, + struct perf_header *h __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} + static void print_hostname(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1159,6 +1361,24 @@ static void print_auxtrace(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n"); } +static void print_stat(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp) +{ + fprintf(fp, "# contains stat data\n"); +} + +static void print_cache(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp __maybe_unused) +{ + int i; + + fprintf(fp, "# CPU cache info:\n"); + for (i = 0; i < ph->env.caches_cnt; i++) { + fprintf(fp, "# "); + cpu_cache_level__fprintf(fp, &ph->env.caches[i]); + } +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1907,6 +2127,68 @@ static int process_auxtrace(struct perf_file_section *section, return err; } +static int process_cache(struct perf_file_section *section __maybe_unused, + struct perf_header *ph __maybe_unused, int fd __maybe_unused, + void *data __maybe_unused) +{ + struct cpu_cache_level *caches; + u32 cnt, i, version; + + if (readn(fd, &version, sizeof(version)) != sizeof(version)) + return -1; + + if (ph->needs_swap) + version = bswap_32(version); + + if (version != 1) + return -1; + + if (readn(fd, &cnt, sizeof(cnt)) != sizeof(cnt)) + return -1; + + if (ph->needs_swap) + cnt = bswap_32(cnt); + + caches = zalloc(sizeof(*caches) * cnt); + if (!caches) + return -1; + + for (i = 0; i < cnt; i++) { + struct cpu_cache_level c; + + #define _R(v) \ + if (readn(fd, &c.v, sizeof(u32)) != sizeof(u32))\ + goto out_free_caches; \ + if (ph->needs_swap) \ + c.v = bswap_32(c.v); \ + + _R(level) + _R(line_size) + _R(sets) + _R(ways) + #undef _R + + #define _R(v) \ + c.v = do_read_string(fd, ph); \ + if (!c.v) \ + goto out_free_caches; + + _R(type) + _R(size) + _R(map) + #undef _R + + caches[i] = c; + } + + ph->env.caches = caches; + ph->env.caches_cnt = cnt; + return 0; +out_free_caches: + free(caches); + return -1; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1948,6 +2230,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), FEAT_OPP(HEADER_GROUP_DESC, group_desc), FEAT_OPP(HEADER_AUXTRACE, auxtrace), + FEAT_OPA(HEADER_STAT, stat), + FEAT_OPF(HEADER_CACHE, cache), }; struct header_print_data { @@ -2686,6 +2970,152 @@ int perf_event__synthesize_attr(struct perf_tool *tool, return err; } +static struct event_update_event * +event_update_event__new(size_t size, u64 type, u64 id) +{ + struct event_update_event *ev; + + size += sizeof(*ev); + size = PERF_ALIGN(size, sizeof(u64)); + + ev = zalloc(size); + if (ev) { + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = type; + ev->id = id; + } + return ev; +} + +int +perf_event__synthesize_event_update_unit(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process) +{ + struct event_update_event *ev; + size_t size = strlen(evsel->unit); + int err; + + ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + strncpy(ev->data, evsel->unit, size); + err = process(tool, (union perf_event *)ev, NULL, NULL); + free(ev); + return err; +} + +int +perf_event__synthesize_event_update_scale(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process) +{ + struct event_update_event *ev; + struct event_update_event_scale *ev_data; + int err; + + ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + ev_data = (struct event_update_event_scale *) ev->data; + ev_data->scale = evsel->scale; + err = process(tool, (union perf_event*) ev, NULL, NULL); + free(ev); + return err; +} + +int +perf_event__synthesize_event_update_name(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process) +{ + struct event_update_event *ev; + size_t len = strlen(evsel->name); + int err; + + ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]); + if (ev == NULL) + return -ENOMEM; + + strncpy(ev->data, evsel->name, len); + err = process(tool, (union perf_event*) ev, NULL, NULL); + free(ev); + return err; +} + +int +perf_event__synthesize_event_update_cpus(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process) +{ + size_t size = sizeof(struct event_update_event); + struct event_update_event *ev; + int max, err; + u16 type; + + if (!evsel->own_cpus) + return 0; + + ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max); + if (!ev) + return -ENOMEM; + + ev->header.type = PERF_RECORD_EVENT_UPDATE; + ev->header.size = (u16)size; + ev->type = PERF_EVENT_UPDATE__CPUS; + ev->id = evsel->id[0]; + + cpu_map_data__synthesize((struct cpu_map_data *) ev->data, + evsel->own_cpus, + type, max); + + err = process(tool, (union perf_event*) ev, NULL, NULL); + free(ev); + return err; +} + +size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) +{ + struct event_update_event *ev = &event->event_update; + struct event_update_event_scale *ev_scale; + struct event_update_event_cpus *ev_cpus; + struct cpu_map *map; + size_t ret; + + ret = fprintf(fp, "\n... id: %" PRIu64 "\n", ev->id); + + switch (ev->type) { + case PERF_EVENT_UPDATE__SCALE: + ev_scale = (struct event_update_event_scale *) ev->data; + ret += fprintf(fp, "... scale: %f\n", ev_scale->scale); + break; + case PERF_EVENT_UPDATE__UNIT: + ret += fprintf(fp, "... unit: %s\n", ev->data); + break; + case PERF_EVENT_UPDATE__NAME: + ret += fprintf(fp, "... name: %s\n", ev->data); + break; + case PERF_EVENT_UPDATE__CPUS: + ev_cpus = (struct event_update_event_cpus *) ev->data; + ret += fprintf(fp, "... "); + + map = cpu_map__new_data(&ev_cpus->cpus); + if (map) + ret += cpu_map__fprintf(map, fp); + else + ret += fprintf(fp, "failed to get cpus\n"); + break; + default: + ret += fprintf(fp, "... unknown type\n"); + break; + } + + return ret; +} + int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process) @@ -2745,6 +3175,51 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, return 0; } +int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_evlist **pevlist) +{ + struct event_update_event *ev = &event->event_update; + struct event_update_event_scale *ev_scale; + struct event_update_event_cpus *ev_cpus; + struct perf_evlist *evlist; + struct perf_evsel *evsel; + struct cpu_map *map; + + if (!pevlist || *pevlist == NULL) + return -EINVAL; + + evlist = *pevlist; + + evsel = perf_evlist__id2evsel(evlist, ev->id); + if (evsel == NULL) + return -EINVAL; + + switch (ev->type) { + case PERF_EVENT_UPDATE__UNIT: + evsel->unit = strdup(ev->data); + break; + case PERF_EVENT_UPDATE__NAME: + evsel->name = strdup(ev->data); + break; + case PERF_EVENT_UPDATE__SCALE: + ev_scale = (struct event_update_event_scale *) ev->data; + evsel->scale = ev_scale->scale; + case PERF_EVENT_UPDATE__CPUS: + ev_cpus = (struct event_update_event_cpus *) ev->data; + + map = cpu_map__new_data(&ev_cpus->cpus); + if (map) + evsel->own_cpus = map; + else + pr_err("failed to get event_update cpus\n"); + default: + break; + } + + return 0; +} + int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, perf_event__handler_t process) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 05f27cb6b7e3..3d87ca823c0a 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -31,6 +31,8 @@ enum { HEADER_PMU_MAPPINGS, HEADER_GROUP_DESC, HEADER_AUXTRACE, + HEADER_STAT, + HEADER_CACHE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -105,8 +107,24 @@ int perf_event__synthesize_attr(struct perf_tool *tool, int perf_event__synthesize_attrs(struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); +int perf_event__synthesize_event_update_unit(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_scale(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_name(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process); +int perf_event__synthesize_event_update_cpus(struct perf_tool *tool, + struct perf_evsel *evsel, + perf_event__handler_t process); int perf_event__process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); +int perf_event__process_event_update(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_evlist **pevlist); +size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c new file mode 100644 index 000000000000..43a98a4dc1e1 --- /dev/null +++ b/tools/perf/util/help-unknown-cmd.c @@ -0,0 +1,104 @@ +#include "cache.h" +#include <subcmd/help.h> +#include "../builtin.h" +#include "levenshtein.h" + +static int autocorrect; +static struct cmdnames aliases; + +static int perf_unknown_cmd_config(const char *var, const char *value, + void *cb __maybe_unused) +{ + if (!strcmp(var, "help.autocorrect")) + autocorrect = perf_config_int(var,value); + /* Also use aliases for command lookup */ + if (!prefixcmp(var, "alias.")) + add_cmdname(&aliases, var + 6, strlen(var + 6)); + + return 0; +} + +static int levenshtein_compare(const void *p1, const void *p2) +{ + const struct cmdname *const *c1 = p1, *const *c2 = p2; + const char *s1 = (*c1)->name, *s2 = (*c2)->name; + int l1 = (*c1)->len; + int l2 = (*c2)->len; + return l1 != l2 ? l1 - l2 : strcmp(s1, s2); +} + +static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) +{ + unsigned int i; + + ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); + + for (i = 0; i < old->cnt; i++) + cmds->names[cmds->cnt++] = old->names[i]; + zfree(&old->names); + old->cnt = 0; +} + +const char *help_unknown_cmd(const char *cmd) +{ + unsigned int i, n = 0, best_similarity = 0; + struct cmdnames main_cmds, other_cmds; + + memset(&main_cmds, 0, sizeof(main_cmds)); + memset(&other_cmds, 0, sizeof(main_cmds)); + memset(&aliases, 0, sizeof(aliases)); + + perf_config(perf_unknown_cmd_config, NULL); + + load_command_list("perf-", &main_cmds, &other_cmds); + + add_cmd_list(&main_cmds, &aliases); + add_cmd_list(&main_cmds, &other_cmds); + qsort(main_cmds.names, main_cmds.cnt, + sizeof(main_cmds.names), cmdname_compare); + uniq(&main_cmds); + + if (main_cmds.cnt) { + /* This reuses cmdname->len for similarity index */ + for (i = 0; i < main_cmds.cnt; ++i) + main_cmds.names[i]->len = + levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); + + qsort(main_cmds.names, main_cmds.cnt, + sizeof(*main_cmds.names), levenshtein_compare); + + best_similarity = main_cmds.names[0]->len; + n = 1; + while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) + ++n; + } + + if (autocorrect && n == 1) { + const char *assumed = main_cmds.names[0]->name; + + main_cmds.names[0] = NULL; + clean_cmdnames(&main_cmds); + fprintf(stderr, "WARNING: You called a perf program named '%s', " + "which does not exist.\n" + "Continuing under the assumption that you meant '%s'\n", + cmd, assumed); + if (autocorrect > 0) { + fprintf(stderr, "in %0.1f seconds automatically...\n", + (float)autocorrect/10.0); + poll(NULL, 0, autocorrect * 100); + } + return assumed; + } + + fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); + + if (main_cmds.cnt && best_similarity < 6) { + fprintf(stderr, "\nDid you mean %s?\n", + n < 2 ? "this": "one of these"); + + for (i = 0; i < n; i++) + fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); + } + + exit(1); +} diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/perf/util/help-unknown-cmd.h diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c deleted file mode 100644 index 86c37c472263..000000000000 --- a/tools/perf/util/help.c +++ /dev/null @@ -1,339 +0,0 @@ -#include "cache.h" -#include "../builtin.h" -#include "exec_cmd.h" -#include "levenshtein.h" -#include "help.h" -#include <termios.h> - -void add_cmdname(struct cmdnames *cmds, const char *name, size_t len) -{ - struct cmdname *ent = malloc(sizeof(*ent) + len + 1); - - ent->len = len; - memcpy(ent->name, name, len); - ent->name[len] = 0; - - ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); - cmds->names[cmds->cnt++] = ent; -} - -static void clean_cmdnames(struct cmdnames *cmds) -{ - unsigned int i; - - for (i = 0; i < cmds->cnt; ++i) - zfree(&cmds->names[i]); - zfree(&cmds->names); - cmds->cnt = 0; - cmds->alloc = 0; -} - -static int cmdname_compare(const void *a_, const void *b_) -{ - struct cmdname *a = *(struct cmdname **)a_; - struct cmdname *b = *(struct cmdname **)b_; - return strcmp(a->name, b->name); -} - -static void uniq(struct cmdnames *cmds) -{ - unsigned int i, j; - - if (!cmds->cnt) - return; - - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - - cmds->cnt = j; -} - -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) -{ - size_t ci, cj, ei; - int cmp; - - ci = cj = ei = 0; - while (ci < cmds->cnt && ei < excludes->cnt) { - cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); - if (cmp < 0) - cmds->names[cj++] = cmds->names[ci++]; - else if (cmp == 0) - ci++, ei++; - else if (cmp > 0) - ei++; - } - - while (ci < cmds->cnt) - cmds->names[cj++] = cmds->names[ci++]; - - cmds->cnt = cj; -} - -static void pretty_print_string_list(struct cmdnames *cmds, int longest) -{ - int cols = 1, rows; - int space = longest + 1; /* min 1 SP between words */ - struct winsize win; - int max_cols; - int i, j; - - get_term_dimensions(&win); - max_cols = win.ws_col - 1; /* don't print *on* the edge */ - - if (space < max_cols) - cols = max_cols / space; - rows = (cmds->cnt + cols - 1) / cols; - - for (i = 0; i < rows; i++) { - printf(" "); - - for (j = 0; j < cols; j++) { - unsigned int n = j * rows + i; - unsigned int size = space; - - if (n >= cmds->cnt) - break; - if (j == cols-1 || n + rows >= cmds->cnt) - size = 1; - printf("%-*s", size, cmds->names[n]->name); - } - putchar('\n'); - } -} - -static int is_executable(const char *name) -{ - struct stat st; - - if (stat(name, &st) || /* stat, not lstat */ - !S_ISREG(st.st_mode)) - return 0; - - return st.st_mode & S_IXUSR; -} - -static void list_commands_in_dir(struct cmdnames *cmds, - const char *path, - const char *prefix) -{ - int prefix_len; - DIR *dir = opendir(path); - struct dirent *de; - struct strbuf buf = STRBUF_INIT; - int len; - - if (!dir) - return; - if (!prefix) - prefix = "perf-"; - prefix_len = strlen(prefix); - - strbuf_addf(&buf, "%s/", path); - len = buf.len; - - while ((de = readdir(dir)) != NULL) { - int entlen; - - if (prefixcmp(de->d_name, prefix)) - continue; - - strbuf_setlen(&buf, len); - strbuf_addstr(&buf, de->d_name); - if (!is_executable(buf.buf)) - continue; - - entlen = strlen(de->d_name) - prefix_len; - if (has_extension(de->d_name, ".exe")) - entlen -= 4; - - add_cmdname(cmds, de->d_name + prefix_len, entlen); - } - closedir(dir); - strbuf_release(&buf); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - const char *env_path = getenv("PATH"); - const char *exec_path = perf_exec_path(); - - if (exec_path) { - list_commands_in_dir(main_cmds, exec_path, prefix); - qsort(main_cmds->names, main_cmds->cnt, - sizeof(*main_cmds->names), cmdname_compare); - uniq(main_cmds); - } - - if (env_path) { - char *paths, *path, *colon; - path = paths = strdup(env_path); - while (1) { - if ((colon = strchr(path, PATH_SEP))) - *colon = 0; - if (!exec_path || strcmp(path, exec_path)) - list_commands_in_dir(other_cmds, path, prefix); - - if (!colon) - break; - path = colon + 1; - } - free(paths); - - qsort(other_cmds->names, other_cmds->cnt, - sizeof(*other_cmds->names), cmdname_compare); - uniq(other_cmds); - } - exclude_cmds(other_cmds, main_cmds); -} - -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - unsigned int i, longest = 0; - - for (i = 0; i < main_cmds->cnt; i++) - if (longest < main_cmds->names[i]->len) - longest = main_cmds->names[i]->len; - for (i = 0; i < other_cmds->cnt; i++) - if (longest < other_cmds->names[i]->len) - longest = other_cmds->names[i]->len; - - if (main_cmds->cnt) { - const char *exec_path = perf_exec_path(); - printf("available %s in '%s'\n", title, exec_path); - printf("----------------"); - mput_char('-', strlen(title) + strlen(exec_path)); - putchar('\n'); - pretty_print_string_list(main_cmds, longest); - putchar('\n'); - } - - if (other_cmds->cnt) { - printf("%s available from elsewhere on your $PATH\n", title); - printf("---------------------------------------"); - mput_char('-', strlen(title)); - putchar('\n'); - pretty_print_string_list(other_cmds, longest); - putchar('\n'); - } -} - -int is_in_cmdlist(struct cmdnames *c, const char *s) -{ - unsigned int i; - - for (i = 0; i < c->cnt; i++) - if (!strcmp(s, c->names[i]->name)) - return 1; - return 0; -} - -static int autocorrect; -static struct cmdnames aliases; - -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) -{ - if (!strcmp(var, "help.autocorrect")) - autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); - - return perf_default_config(var, value, cb); -} - -static int levenshtein_compare(const void *p1, const void *p2) -{ - const struct cmdname *const *c1 = p1, *const *c2 = p2; - const char *s1 = (*c1)->name, *s2 = (*c2)->name; - int l1 = (*c1)->len; - int l2 = (*c2)->len; - return l1 != l2 ? l1 - l2 : strcmp(s1, s2); -} - -static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) -{ - unsigned int i; - - ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); - - for (i = 0; i < old->cnt; i++) - cmds->names[cmds->cnt++] = old->names[i]; - zfree(&old->names); - old->cnt = 0; -} - -const char *help_unknown_cmd(const char *cmd) -{ - unsigned int i, n = 0, best_similarity = 0; - struct cmdnames main_cmds, other_cmds; - - memset(&main_cmds, 0, sizeof(main_cmds)); - memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); - - perf_config(perf_unknown_cmd_config, NULL); - - load_command_list("perf-", &main_cmds, &other_cmds); - - add_cmd_list(&main_cmds, &aliases); - add_cmd_list(&main_cmds, &other_cmds); - qsort(main_cmds.names, main_cmds.cnt, - sizeof(main_cmds.names), cmdname_compare); - uniq(&main_cmds); - - if (main_cmds.cnt) { - /* This reuses cmdname->len for similarity index */ - for (i = 0; i < main_cmds.cnt; ++i) - main_cmds.names[i]->len = - levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); - - qsort(main_cmds.names, main_cmds.cnt, - sizeof(*main_cmds.names), levenshtein_compare); - - best_similarity = main_cmds.names[0]->len; - n = 1; - while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) - ++n; - } - - if (autocorrect && n == 1) { - const char *assumed = main_cmds.names[0]->name; - - main_cmds.names[0] = NULL; - clean_cmdnames(&main_cmds); - fprintf(stderr, "WARNING: You called a perf program named '%s', " - "which does not exist.\n" - "Continuing under the assumption that you meant '%s'\n", - cmd, assumed); - if (autocorrect > 0) { - fprintf(stderr, "in %0.1f seconds automatically...\n", - (float)autocorrect/10.0); - poll(NULL, 0, autocorrect * 100); - } - return assumed; - } - - fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); - - if (main_cmds.cnt && best_similarity < 6) { - fprintf(stderr, "\nDid you mean %s?\n", - n < 2 ? "this": "one of these"); - - for (i = 0; i < n; i++) - fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); - } - - exit(1); -} - -int cmd_version(int argc __maybe_unused, const char **argv __maybe_unused, - const char *prefix __maybe_unused) -{ - printf("perf version %s\n", perf_version_string); - return 0; -} diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h deleted file mode 100644 index 7f5c6dedd714..000000000000 --- a/tools/perf/util/help.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __PERF_HELP_H -#define __PERF_HELP_H - -struct cmdnames { - size_t alloc; - size_t cnt; - struct cmdname { - size_t len; /* also used for similarity index in help.c */ - char name[FLEX_ARRAY]; - } **names; -}; - -static inline void mput_char(char c, unsigned int num) -{ - while(num--) - putchar(c); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, size_t len); -/* Here we require that excludes is a sorted list. */ -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); -int is_in_cmdlist(struct cmdnames *c, const char *s); -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds); - -#endif /* __PERF_HELP_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4fd37d6708cb..290b3cbf6877 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -131,6 +131,8 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); + hists__new_col_len(hists, HISTC_MEM_DCACHELINE, + symlen); } if (h->mem_info->iaddr.sym) { @@ -177,6 +179,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) if (h->transaction) hists__new_col_len(hists, HISTC_TRANSACTION, hist_entry__transaction_len()); + + if (h->trace_output) + hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output)); } void hists__output_recalc_col_len(struct hists *hists, int max_rows) @@ -243,6 +248,8 @@ static void he_stat__decay(struct he_stat *he_stat) /* XXX need decay for weight too? */ } +static void hists__delete_entry(struct hists *hists, struct hist_entry *he); + static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) { u64 prev_period = he->stat.period; @@ -254,22 +261,49 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) he_stat__decay(&he->stat); if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); + decay_callchain(he->callchain); diff = prev_period - he->stat.period; - hists->stats.total_period -= diff; - if (!he->filtered) - hists->stats.total_non_filtered_period -= diff; + if (!he->depth) { + hists->stats.total_period -= diff; + if (!he->filtered) + hists->stats.total_non_filtered_period -= diff; + } + + if (!he->leaf) { + struct hist_entry *child; + struct rb_node *node = rb_first(&he->hroot_out); + while (node) { + child = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + if (hists__decay_entry(hists, child)) + hists__delete_entry(hists, child); + } + } return he->stat.period == 0; } static void hists__delete_entry(struct hists *hists, struct hist_entry *he) { - rb_erase(&he->rb_node, &hists->entries); + struct rb_root *root_in; + struct rb_root *root_out; - if (sort__need_collapse) - rb_erase(&he->rb_node_in, &hists->entries_collapsed); + if (he->parent_he) { + root_in = &he->parent_he->hroot_in; + root_out = &he->parent_he->hroot_out; + } else { + if (sort__need_collapse) + root_in = &hists->entries_collapsed; + else + root_in = hists->entries_in; + root_out = &hists->entries; + } + + rb_erase(&he->rb_node_in, root_in); + rb_erase(&he->rb_node, root_out); --hists->nr_entries; if (!he->filtered) @@ -367,8 +401,30 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, if (symbol_conf.use_callchain) callchain_init(he->callchain); + if (he->raw_data) { + he->raw_data = memdup(he->raw_data, he->raw_size); + + if (he->raw_data == NULL) { + map__put(he->ms.map); + if (he->branch_info) { + map__put(he->branch_info->from.map); + map__put(he->branch_info->to.map); + free(he->branch_info); + } + if (he->mem_info) { + map__put(he->mem_info->iaddr.map); + map__put(he->mem_info->daddr.map); + } + free(he->stat_acc); + free(he); + return NULL; + } + } INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); + + if (!symbol_conf.report_hierarchy) + he->leaf = true; } return he; @@ -381,6 +437,16 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } +static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) +{ + if (!symbol_conf.use_callchain) + return; + + he->hists->callchain_period += period; + if (!he->filtered) + he->hists->callchain_non_filtered_period += period; +} + static struct hist_entry *hists__findnew_entry(struct hists *hists, struct hist_entry *entry, struct addr_location *al, @@ -408,8 +474,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, cmp = hist_entry__cmp(he, entry); if (!cmp) { - if (sample_self) + if (sample_self) { he_stat__add_period(&he->stat, period, weight); + hist_entry__add_callchain_period(he, period); + } if (symbol_conf.cumulate_callchain) he_stat__add_period(he->stat_acc, period, weight); @@ -442,6 +510,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, if (!he) return NULL; + if (sample_self) + hist_entry__add_callchain_period(he, period); hists->nr_entries++; rb_link_node(&he->rb_node_in, parent, p); @@ -459,7 +529,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct symbol *sym_parent, struct branch_info *bi, struct mem_info *mi, - u64 period, u64 weight, u64 transaction, + struct perf_sample *sample, bool sample_self) { struct hist_entry entry = { @@ -476,15 +546,17 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .level = al->level, .stat = { .nr_events = 1, - .period = period, - .weight = weight, + .period = sample->period, + .weight = sample->weight, }, .parent = sym_parent, .filtered = symbol__parent_filter(sym_parent) | al->filtered, .hists = hists, .branch_info = bi, .mem_info = mi, - .transaction = transaction, + .transaction = sample->transaction, + .raw_data = sample->raw_data, + .raw_size = sample->raw_size, }; return hists__findnew_entry(hists, &entry, al, sample_self); @@ -524,12 +596,13 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al u64 cost; struct mem_info *mi = iter->priv; struct hists *hists = evsel__hists(iter->evsel); + struct perf_sample *sample = iter->sample; struct hist_entry *he; if (mi == NULL) return -EINVAL; - cost = iter->sample->weight; + cost = sample->weight; if (!cost) cost = 1; @@ -540,8 +613,10 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al * and this is indirectly achieved by passing period=weight here * and the he_stat__add_period() function. */ + sample->period = cost; + he = __hists__add_entry(hists, al, iter->parent, NULL, mi, - cost, cost, 0, true); + sample, true); if (!he) return -ENOMEM; @@ -628,6 +703,7 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a struct branch_info *bi; struct perf_evsel *evsel = iter->evsel; struct hists *hists = evsel__hists(evsel); + struct perf_sample *sample = iter->sample; struct hist_entry *he = NULL; int i = iter->curr; int err = 0; @@ -641,9 +717,11 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a * The report shows the percentage of total branches captured * and not events sampled. Thus we use a pseudo period of 1. */ + sample->period = 1; + sample->weight = bi->flags.cycles ? bi->flags.cycles : 1; + he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, - 1, bi->flags.cycles ? bi->flags.cycles : 1, - 0, true); + sample, true); if (he == NULL) return -ENOMEM; @@ -680,8 +758,7 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location struct hist_entry *he; he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction, true); + sample, true); if (he == NULL) return -ENOMEM; @@ -742,8 +819,7 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter, int err = 0; he = __hists__add_entry(hists, al, iter->parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction, true); + sample, true); if (he == NULL) return -ENOMEM; @@ -795,6 +871,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, .sym = al->sym, }, .parent = iter->parent, + .raw_data = sample->raw_data, + .raw_size = sample->raw_size, }; int i; struct callchain_cursor cursor; @@ -816,8 +894,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, } he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL, - sample->period, sample->weight, - sample->transaction, false); + sample, false); if (he == NULL) return -ENOMEM; @@ -920,11 +997,13 @@ out: int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; cmp = fmt->cmp(fmt, left, right); @@ -938,11 +1017,13 @@ hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) { + struct hists *hists = left->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + !perf_hpp__defined_dynamic_entry(fmt, hists)) continue; cmp = fmt->collapse(fmt, left, right); @@ -975,22 +1056,256 @@ void hist_entry__delete(struct hist_entry *he) if (he->srcfile && he->srcfile[0]) free(he->srcfile); free_callchain(he->callchain); + free(he->trace_output); + free(he->raw_data); free(he); } /* + * If this is not the last column, then we need to pad it according to the + * pre-calculated max lenght for this column, otherwise don't bother adding + * spaces because that would break viewing this with, for instance, 'less', + * that would show tons of trailing spaces when a long C++ demangled method + * names is sampled. +*/ +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed) +{ + if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) { + const int width = fmt->width(fmt, hpp, hists_to_evsel(he->hists)); + if (printed < width) { + advance_hpp(hpp, printed); + printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " "); + } + } + + return printed; +} + +/* * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, +static void hists__apply_filters(struct hists *hists, struct hist_entry *he); +static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he, + enum hist_filter type); + +typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt); + +static bool check_thread_entry(struct perf_hpp_fmt *fmt) +{ + return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt); +} + +static void hist_entry__check_and_remove_filter(struct hist_entry *he, + enum hist_filter type, + fmt_chk_fn check) +{ + struct perf_hpp_fmt *fmt; + bool type_match = false; + struct hist_entry *parent = he->parent_he; + + switch (type) { + case HIST_FILTER__THREAD: + if (symbol_conf.comm_list == NULL && + symbol_conf.pid_list == NULL && + symbol_conf.tid_list == NULL) + return; + break; + case HIST_FILTER__DSO: + if (symbol_conf.dso_list == NULL) + return; + break; + case HIST_FILTER__SYMBOL: + if (symbol_conf.sym_list == NULL) + return; + break; + case HIST_FILTER__PARENT: + case HIST_FILTER__GUEST: + case HIST_FILTER__HOST: + case HIST_FILTER__SOCKET: + default: + return; + } + + /* if it's filtered by own fmt, it has to have filter bits */ + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (check(fmt)) { + type_match = true; + break; + } + } + + if (type_match) { + /* + * If the filter is for current level entry, propagate + * filter marker to parents. The marker bit was + * already set by default so it only needs to clear + * non-filtered entries. + */ + if (!(he->filtered & (1 << type))) { + while (parent) { + parent->filtered &= ~(1 << type); + parent = parent->parent_he; + } + } + } else { + /* + * If current entry doesn't have matching formats, set + * filter marker for upper level entries. it will be + * cleared if its lower level entries is not filtered. + * + * For lower-level entries, it inherits parent's + * filter bit so that lower level entries of a + * non-filtered entry won't set the filter marker. + */ + if (parent == NULL) + he->filtered |= (1 << type); + else + he->filtered |= (parent->filtered & (1 << type)); + } +} + +static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) +{ + hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD, + check_thread_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO, + perf_hpp__is_dso_entry); + + hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, + perf_hpp__is_sym_entry); + + hists__apply_filters(he->hists, he); +} + +static struct hist_entry *hierarchy_insert_entry(struct hists *hists, + struct rb_root *root, + struct hist_entry *he, + struct hist_entry *parent_he, + struct perf_hpp_list *hpp_list) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter, *new; + struct perf_hpp_fmt *fmt; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node_in); + + cmp = 0; + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + cmp = fmt->collapse(fmt, iter, he); + if (cmp) + break; + } + + if (!cmp) { + he_stat__add_stat(&iter->stat, &he->stat); + return iter; + } + + if (cmp < 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + new = hist_entry__new(he, true); + if (new == NULL) + return NULL; + + hists->nr_entries++; + + /* save related format list for output */ + new->hpp_list = hpp_list; + new->parent_he = parent_he; + + hist_entry__apply_hierarchy_filters(new); + + /* some fields are now passed to 'new' */ + perf_hpp_list__for_each_sort_list(hpp_list, fmt) { + if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt)) + he->trace_output = NULL; + else + new->trace_output = NULL; + + if (perf_hpp__is_srcline_entry(fmt)) + he->srcline = NULL; + else + new->srcline = NULL; + + if (perf_hpp__is_srcfile_entry(fmt)) + he->srcfile = NULL; + else + new->srcfile = NULL; + } + + rb_link_node(&new->rb_node_in, parent, p); + rb_insert_color(&new->rb_node_in, root); + return new; +} + +static int hists__hierarchy_insert_entry(struct hists *hists, struct rb_root *root, struct hist_entry *he) { + struct perf_hpp_list_node *node; + struct hist_entry *new_he = NULL; + struct hist_entry *parent = NULL; + int depth = 0; + int ret = 0; + + list_for_each_entry(node, &hists->hpp_formats, list) { + /* skip period (overhead) and elided columns */ + if (node->level == 0 || node->skip) + continue; + + /* insert copy of 'he' for each fmt into the hierarchy */ + new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp); + if (new_he == NULL) { + ret = -1; + break; + } + + root = &new_he->hroot_in; + new_he->depth = depth++; + parent = new_he; + } + + if (new_he) { + new_he->leaf = true; + + if (symbol_conf.use_callchain) { + callchain_cursor_reset(&callchain_cursor); + if (callchain_merge(&callchain_cursor, + new_he->callchain, + he->callchain) < 0) + ret = -1; + } + } + + /* 'he' is no longer used */ + hist_entry__delete(he); + + /* return 0 (or -1) since it already applied filters */ + return ret; +} + +int hists__collapse_insert_entry(struct hists *hists, struct rb_root *root, + struct hist_entry *he) +{ struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; int64_t cmp; + if (symbol_conf.report_hierarchy) + return hists__hierarchy_insert_entry(hists, root, he); + while (*p != NULL) { parent = *p; iter = rb_entry(parent, struct hist_entry, rb_node_in); @@ -998,18 +1313,21 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, cmp = hist_entry__collapse(iter, he); if (!cmp) { + int ret = 0; + he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); if (symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); - callchain_merge(&callchain_cursor, - iter->callchain, - he->callchain); + if (callchain_merge(&callchain_cursor, + iter->callchain, + he->callchain) < 0) + ret = -1; } hist_entry__delete(he); - return false; + return ret; } if (cmp < 0) @@ -1021,10 +1339,10 @@ static bool hists__collapse_insert_entry(struct hists *hists __maybe_unused, rb_link_node(&he->rb_node_in, parent, p); rb_insert_color(&he->rb_node_in, root); - return true; + return 1; } -static struct rb_root *hists__get_rotate_entries_in(struct hists *hists) +struct rb_root *hists__get_rotate_entries_in(struct hists *hists) { struct rb_root *root; @@ -1047,14 +1365,15 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_socket(hists, he); } -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; + int ret; if (!sort__need_collapse) - return; + return 0; hists->nr_entries = 0; @@ -1069,7 +1388,11 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) next = rb_next(&n->rb_node_in); rb_erase(&n->rb_node_in, root); - if (hists__collapse_insert_entry(hists, &hists->entries_collapsed, n)) { + ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); + if (ret < 0) + return -1; + + if (ret) { /* * If it wasn't combined with one of the entries already * collapsed, we need to apply the filters that may have @@ -1080,15 +1403,17 @@ void hists__collapse_resort(struct hists *hists, struct ui_progress *prog) if (prog) ui_progress__update(prog, 1); } + return 0; } static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { + struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; int64_t cmp = 0; - perf_hpp__for_each_sort_list(fmt) { - if (perf_hpp__should_skip(fmt)) + hists__for_each_sort_list(hists, fmt) { + if (perf_hpp__should_skip(fmt, a->hists)) continue; cmp = fmt->sort(fmt, a, b); @@ -1128,6 +1453,113 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) hists->stats.total_period += h->stat.period; } +static void hierarchy_recalc_total_periods(struct hists *hists) +{ + struct rb_node *node; + struct hist_entry *he; + + node = rb_first(&hists->entries); + + hists->stats.total_period = 0; + hists->stats.total_non_filtered_period = 0; + + /* + * recalculate total period using top-level entries only + * since lower level entries only see non-filtered entries + * but upper level entries have sum of both entries. + */ + while (node) { + he = rb_entry(node, struct hist_entry, rb_node); + node = rb_next(node); + + hists->stats.total_period += he->stat.period; + if (!he->filtered) + hists->stats.total_non_filtered_period += he->stat.period; + } +} + +static void hierarchy_insert_output_entry(struct rb_root *root, + struct hist_entry *he) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + struct perf_hpp_fmt *fmt; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); + + /* update column width of dynamic entry */ + perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt)) + fmt->sort(fmt, he, NULL); + } +} + +static void hists__hierarchy_output_resort(struct hists *hists, + struct ui_progress *prog, + struct rb_root *root_in, + struct rb_root *root_out, + u64 min_callchain_hits, + bool use_callchain) +{ + struct rb_node *node; + struct hist_entry *he; + + *root_out = RB_ROOT; + node = rb_first(root_in); + + while (node) { + he = rb_entry(node, struct hist_entry, rb_node_in); + node = rb_next(node); + + hierarchy_insert_output_entry(root_out, he); + + if (prog) + ui_progress__update(prog, 1); + + if (!he->leaf) { + hists__hierarchy_output_resort(hists, prog, + &he->hroot_in, + &he->hroot_out, + min_callchain_hits, + use_callchain); + hists->nr_entries++; + if (!he->filtered) { + hists->nr_non_filtered_entries++; + hists__calc_col_len(hists, he); + } + + continue; + } + + if (!use_callchain) + continue; + + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; + + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } + + callchain_param.sort(&he->sorted_chain, he->callchain, + min_callchain_hits, &callchain_param); + } +} + static void __hists__insert_output_entry(struct rb_root *entries, struct hist_entry *he, u64 min_callchain_hits, @@ -1136,10 +1568,20 @@ static void __hists__insert_output_entry(struct rb_root *entries, struct rb_node **p = &entries->rb_node; struct rb_node *parent = NULL; struct hist_entry *iter; + struct perf_hpp_fmt *fmt; + + if (use_callchain) { + if (callchain_param.mode == CHAIN_GRAPH_REL) { + u64 total = he->stat.period; - if (use_callchain) + if (symbol_conf.cumulate_callchain) + total = he->stat_acc->period; + + min_callchain_hits = total * (callchain_param.min_percent / 100); + } callchain_param.sort(&he->sorted_chain, he->callchain, min_callchain_hits, &callchain_param); + } while (*p != NULL) { parent = *p; @@ -1153,23 +1595,41 @@ static void __hists__insert_output_entry(struct rb_root *entries, rb_link_node(&he->rb_node, parent, p); rb_insert_color(&he->rb_node, entries); + + perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { + if (perf_hpp__is_dynamic_entry(fmt) && + perf_hpp__defined_dynamic_entry(fmt, he->hists)) + fmt->sort(fmt, he, NULL); /* update column width */ + } } -void hists__output_resort(struct hists *hists, struct ui_progress *prog) +static void output_resort(struct hists *hists, struct ui_progress *prog, + bool use_callchain) { struct rb_root *root; struct rb_node *next; struct hist_entry *n; + u64 callchain_total; u64 min_callchain_hits; - struct perf_evsel *evsel = hists_to_evsel(hists); - bool use_callchain; - if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; - else - use_callchain = symbol_conf.use_callchain; + callchain_total = hists->callchain_period; + if (symbol_conf.filter_relative) + callchain_total = hists->callchain_non_filtered_period; - min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); + min_callchain_hits = callchain_total * (callchain_param.min_percent / 100); + + hists__reset_stats(hists); + hists__reset_col_len(hists); + + if (symbol_conf.report_hierarchy) { + hists__hierarchy_output_resort(hists, prog, + &hists->entries_collapsed, + &hists->entries, + min_callchain_hits, + use_callchain); + hierarchy_recalc_total_periods(hists); + return; + } if (sort__need_collapse) root = &hists->entries_collapsed; @@ -1179,9 +1639,6 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) next = rb_first(root); hists->entries = RB_ROOT; - hists__reset_stats(hists); - hists__reset_col_len(hists); - while (next) { n = rb_entry(next, struct hist_entry, rb_node_in); next = rb_next(&n->rb_node_in); @@ -1197,15 +1654,136 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) } } +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) +{ + bool use_callchain; + + if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) + use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + else + use_callchain = symbol_conf.use_callchain; + + output_resort(evsel__hists(evsel), prog, use_callchain); +} + +void hists__output_resort(struct hists *hists, struct ui_progress *prog) +{ + output_resort(hists, prog, symbol_conf.use_callchain); +} + +static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) +{ + if (he->leaf || hmd == HMD_FORCE_SIBLING) + return false; + + if (he->unfolded || hmd == HMD_FORCE_CHILD) + return true; + + return false; +} + +struct rb_node *rb_hierarchy_last(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + while (can_goto_child(he, HMD_NORMAL)) { + node = rb_last(&he->hroot_out); + he = rb_entry(node, struct hist_entry, rb_node); + } + return node; +} + +struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + if (can_goto_child(he, hmd)) + node = rb_first(&he->hroot_out); + else + node = rb_next(node); + + while (node == NULL) { + he = he->parent_he; + if (he == NULL) + break; + + node = rb_next(&he->rb_node); + } + return node; +} + +struct rb_node *rb_hierarchy_prev(struct rb_node *node) +{ + struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); + + node = rb_prev(node); + if (node) + return rb_hierarchy_last(node); + + he = he->parent_he; + if (he == NULL) + return NULL; + + return &he->rb_node; +} + +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) +{ + struct rb_node *node; + struct hist_entry *child; + float percent; + + if (he->leaf) + return false; + + node = rb_first(&he->hroot_out); + child = rb_entry(node, struct hist_entry, rb_node); + + while (node && child->filtered) { + node = rb_next(node); + child = rb_entry(node, struct hist_entry, rb_node); + } + + if (node) + percent = hist_entry__get_percent_limit(child); + else + percent = 0; + + return node && percent >= limit; +} + static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h, enum hist_filter filter) { h->filtered &= ~(1 << filter); + + if (symbol_conf.report_hierarchy) { + struct hist_entry *parent = h->parent_he; + + while (parent) { + he_stat__add_stat(&parent->stat, &h->stat); + + parent->filtered &= ~(1 << filter); + + if (parent->filtered) + goto next; + + /* force fold unfiltered entry for simplicity */ + parent->unfolded = false; + parent->has_no_entry = false; + parent->row_offset = 0; + parent->nr_rows = 0; +next: + parent = parent->parent_he; + } + } + if (h->filtered) return; /* force fold unfiltered entry for simplicity */ h->unfolded = false; + h->has_no_entry = false; h->row_offset = 0; h->nr_rows = 0; @@ -1228,28 +1806,6 @@ static bool hists__filter_entry_by_dso(struct hists *hists, return false; } -void hists__filter_by_dso(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (symbol_conf.exclude_other && !h->parent) - continue; - - if (hists__filter_entry_by_dso(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__DSO); - } -} - static bool hists__filter_entry_by_thread(struct hists *hists, struct hist_entry *he) { @@ -1262,25 +1818,6 @@ static bool hists__filter_entry_by_thread(struct hists *hists, return false; } -void hists__filter_by_thread(struct hists *hists) -{ - struct rb_node *nd; - - hists->stats.nr_non_filtered_samples = 0; - - hists__reset_filter_stats(hists); - hists__reset_col_len(hists); - - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { - struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - - if (hists__filter_entry_by_thread(hists, h)) - continue; - - hists__remove_entry_filter(hists, h, HIST_FILTER__THREAD); - } -} - static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he) { @@ -1294,7 +1831,21 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, return false; } -void hists__filter_by_symbol(struct hists *hists) +static bool hists__filter_entry_by_socket(struct hists *hists, + struct hist_entry *he) +{ + if ((hists->socket_filter > -1) && + (he->socket != hists->socket_filter)) { + he->filtered |= (1 << HIST_FILTER__SOCKET); + return true; + } + + return false; +} + +typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); + +static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) { struct rb_node *nd; @@ -1306,42 +1857,155 @@ void hists__filter_by_symbol(struct hists *hists) for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); - if (hists__filter_entry_by_symbol(hists, h)) + if (filter(hists, h)) continue; - hists__remove_entry_filter(hists, h, HIST_FILTER__SYMBOL); + hists__remove_entry_filter(hists, h, type); } } -static bool hists__filter_entry_by_socket(struct hists *hists, - struct hist_entry *he) +static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) { - if ((hists->socket_filter > -1) && - (he->socket != hists->socket_filter)) { - he->filtered |= (1 << HIST_FILTER__SOCKET); - return true; + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + struct rb_root new_root = RB_ROOT; + struct rb_node *nd; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (hist_entry__sort(he, iter) > 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; } - return false; + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, root); + + if (he->leaf || he->filtered) + return; + + nd = rb_first(&he->hroot_out); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &he->hroot_out); + + resort_filtered_entry(&new_root, h); + } + + he->hroot_out = new_root; } -void hists__filter_by_socket(struct hists *hists) +static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) { struct rb_node *nd; + struct rb_root new_root = RB_ROOT; hists->stats.nr_non_filtered_samples = 0; hists__reset_filter_stats(hists); hists__reset_col_len(hists); - for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { + nd = rb_first(&hists->entries); + while (nd) { struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + int ret; - if (hists__filter_entry_by_socket(hists, h)) - continue; + ret = hist_entry__filter(h, type, arg); + + /* + * case 1. non-matching type + * zero out the period, set filter marker and move to child + */ + if (ret < 0) { + memset(&h->stat, 0, sizeof(h->stat)); + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD); + } + /* + * case 2. matched type (filter out) + * set filter marker and move to next + */ + else if (ret == 1) { + h->filtered |= (1 << type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + /* + * case 3. ok (not filtered) + * add period to hists and parents, erase the filter marker + * and move to next sibling + */ + else { + hists__remove_entry_filter(hists, h, type); + + nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING); + } + } + + hierarchy_recalc_total_periods(hists); + + /* + * resort output after applying a new filter since filter in a lower + * hierarchy can change periods in a upper hierarchy. + */ + nd = rb_first(&hists->entries); + while (nd) { + struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); + + nd = rb_next(nd); + rb_erase(&h->rb_node, &hists->entries); - hists__remove_entry_filter(hists, h, HIST_FILTER__SOCKET); + resort_filtered_entry(&new_root, h); } + + hists->entries = new_root; +} + +void hists__filter_by_thread(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__THREAD, + hists->thread_filter); + else + hists__filter_by_type(hists, HIST_FILTER__THREAD, + hists__filter_entry_by_thread); +} + +void hists__filter_by_dso(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__DSO, + hists->dso_filter); + else + hists__filter_by_type(hists, HIST_FILTER__DSO, + hists__filter_entry_by_dso); +} + +void hists__filter_by_symbol(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL, + hists->symbol_filter_str); + else + hists__filter_by_type(hists, HIST_FILTER__SYMBOL, + hists__filter_entry_by_symbol); +} + +void hists__filter_by_socket(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__SOCKET, + &hists->socket_filter); + else + hists__filter_by_type(hists, HIST_FILTER__SOCKET, + hists__filter_entry_by_socket); } void events_stats__inc(struct events_stats *stats, u32 type) @@ -1559,10 +2223,8 @@ int perf_hist_config(const char *var, const char *value) return 0; } -static int hists_evsel__init(struct perf_evsel *evsel) +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) { - struct hists *hists = evsel__hists(evsel); - memset(hists, 0, sizeof(*hists)); hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; hists->entries_in = &hists->entries_in_array[0]; @@ -1570,6 +2232,56 @@ static int hists_evsel__init(struct perf_evsel *evsel) hists->entries = RB_ROOT; pthread_mutex_init(&hists->lock, NULL); hists->socket_filter = -1; + hists->hpp_list = hpp_list; + INIT_LIST_HEAD(&hists->hpp_formats); + return 0; +} + +static void hists__delete_remaining_entries(struct rb_root *root) +{ + struct rb_node *node; + struct hist_entry *he; + + while (!RB_EMPTY_ROOT(root)) { + node = rb_first(root); + rb_erase(node, root); + + he = rb_entry(node, struct hist_entry, rb_node_in); + hist_entry__delete(he); + } +} + +static void hists__delete_all_entries(struct hists *hists) +{ + hists__delete_entries(hists); + hists__delete_remaining_entries(&hists->entries_in_array[0]); + hists__delete_remaining_entries(&hists->entries_in_array[1]); + hists__delete_remaining_entries(&hists->entries_collapsed); +} + +static void hists_evsel__exit(struct perf_evsel *evsel) +{ + struct hists *hists = evsel__hists(evsel); + struct perf_hpp_fmt *fmt, *pos; + struct perf_hpp_list_node *node, *tmp; + + hists__delete_all_entries(hists); + + list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { + perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { + list_del(&fmt->list); + free(fmt); + } + list_del(&node->list); + free(node); + } +} + +static int hists_evsel__init(struct perf_evsel *evsel) +{ + struct hists *hists = evsel__hists(evsel); + + __hists__init(hists, &perf_hpp_list); return 0; } @@ -1581,9 +2293,16 @@ static int hists_evsel__init(struct perf_evsel *evsel) int hists__init(void) { int err = perf_evsel__object_config(sizeof(struct hists_evsel), - hists_evsel__init, NULL); + hists_evsel__init, + hists_evsel__exit); if (err) fputs("FATAL ERROR: Couldn't setup hists class\n", stderr); return err; } + +void perf_hpp_list__init(struct perf_hpp_list *list) +{ + INIT_LIST_HEAD(&list->fields); + INIT_LIST_HEAD(&list->sorts); +} diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index a48a2078d288..ead18c82294f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -52,6 +52,7 @@ enum hist_column { HISTC_MEM_IADDR_SYMBOL, HISTC_TRANSACTION, HISTC_CYCLES, + HISTC_TRACE, HISTC_NR_COLS, /* Last entry */ }; @@ -65,6 +66,8 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; + u64 callchain_period; + u64 callchain_non_filtered_period; struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; @@ -74,6 +77,9 @@ struct hists { u64 event_stream; u16 col_len[HISTC_NR_COLS]; int socket_filter; + struct perf_hpp_list *hpp_list; + struct list_head hpp_formats; + int nr_hpp_node; }; struct hist_entry_iter; @@ -114,21 +120,27 @@ struct hist_entry *__hists__add_entry(struct hists *hists, struct addr_location *al, struct symbol *parent, struct branch_info *bi, - struct mem_info *mi, u64 period, - u64 weight, u64 transaction, + struct mem_info *mi, + struct perf_sample *sample, bool sample_self); int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int max_stack_depth, void *arg); +struct perf_hpp; +struct perf_hpp_fmt; + int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); int hist_entry__transaction_len(void); int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size, struct hists *hists); +int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, + struct perf_hpp_fmt *fmt, int printed); void hist_entry__delete(struct hist_entry *he); +void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); void hists__output_resort(struct hists *hists, struct ui_progress *prog); -void hists__collapse_resort(struct hists *hists, struct ui_progress *prog); +int hists__collapse_resort(struct hists *hists, struct ui_progress *prog); void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel); void hists__delete_entries(struct hists *hists); @@ -184,6 +196,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) } int hists__init(void); +int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); + +struct rb_root *hists__get_rotate_entries_in(struct hists *hists); +int hists__collapse_insert_entry(struct hists *hists, + struct rb_root *root, struct hist_entry *he); struct perf_hpp { char *buf; @@ -208,28 +225,64 @@ struct perf_hpp_fmt { struct hist_entry *a, struct hist_entry *b); int64_t (*sort)(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b); + bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); + void (*free)(struct perf_hpp_fmt *fmt); struct list_head list; struct list_head sort_list; bool elide; int len; int user_len; + int idx; + int level; +}; + +struct perf_hpp_list { + struct list_head fields; + struct list_head sorts; +}; + +extern struct perf_hpp_list perf_hpp_list; + +struct perf_hpp_list_node { + struct list_head list; + struct perf_hpp_list hpp; + int level; + bool skip; }; -extern struct list_head perf_hpp__list; -extern struct list_head perf_hpp__sort_list; +void perf_hpp_list__column_register(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); +void perf_hpp_list__register_sort_field(struct perf_hpp_list *list, + struct perf_hpp_fmt *format); + +static inline void perf_hpp__column_register(struct perf_hpp_fmt *format) +{ + perf_hpp_list__column_register(&perf_hpp_list, format); +} + +static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) +{ + perf_hpp_list__register_sort_field(&perf_hpp_list, format); +} + +#define perf_hpp_list__for_each_format(_list, format) \ + list_for_each_entry(format, &(_list)->fields, list) -#define perf_hpp__for_each_format(format) \ - list_for_each_entry(format, &perf_hpp__list, list) +#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->fields, list) -#define perf_hpp__for_each_format_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__list, list) +#define perf_hpp_list__for_each_sort_list(_list, format) \ + list_for_each_entry(format, &(_list)->sorts, sort_list) -#define perf_hpp__for_each_sort_list(format) \ - list_for_each_entry(format, &perf_hpp__sort_list, sort_list) +#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \ + list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) -#define perf_hpp__for_each_sort_list_safe(format, tmp) \ - list_for_each_entry_safe(format, tmp, &perf_hpp__sort_list, sort_list) +#define hists__for_each_format(hists, format) \ + perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + +#define hists__for_each_sort_list(hists, format) \ + perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) extern struct perf_hpp_fmt perf_hpp__format[]; @@ -248,23 +301,41 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_register(struct perf_hpp_fmt *format); void perf_hpp__column_unregister(struct perf_hpp_fmt *format); -void perf_hpp__column_enable(unsigned col); -void perf_hpp__column_disable(unsigned col); void perf_hpp__cancel_cumulate(void); +void perf_hpp__setup_output_field(struct perf_hpp_list *list); +void perf_hpp__reset_output_field(struct perf_hpp_list *list); +void perf_hpp__append_sort_keys(struct perf_hpp_list *list); +int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, + struct perf_evlist *evlist); -void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); -void perf_hpp__setup_output_field(void); -void perf_hpp__reset_output_field(void); -void perf_hpp__append_sort_keys(void); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); - -static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format) +bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format); +bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists); +bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); +bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); + +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt); + +int hist_entry__filter(struct hist_entry *he, int type, const void *arg); + +static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format, + struct hists *hists) { - return format->elide; + if (format->elide) + return true; + + if (perf_hpp__is_dynamic_entry(format) && + !perf_hpp__defined_dynamic_entry(format, hists)) + return true; + + return false; } void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); @@ -356,6 +427,7 @@ static inline int script_browse(const char *script_opt __maybe_unused) #endif unsigned int hists__sort_list_width(struct hists *hists); +unsigned int hists__overhead_width(struct hists *hists); void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, struct perf_sample *sample, bool nonany_branch_mode); @@ -365,4 +437,26 @@ int parse_filter_percentage(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused); int perf_hist_config(const char *var, const char *value); +void perf_hpp_list__init(struct perf_hpp_list *list); + +enum hierarchy_move_dir { + HMD_NORMAL, + HMD_FORCE_SIBLING, + HMD_FORCE_CHILD, +}; + +struct rb_node *rb_hierarchy_last(struct rb_node *node); +struct rb_node *__rb_hierarchy_next(struct rb_node *node, + enum hierarchy_move_dir hmd); +struct rb_node *rb_hierarchy_prev(struct rb_node *node); + +static inline struct rb_node *rb_hierarchy_next(struct rb_node *node) +{ + return __rb_hierarchy_next(node, HMD_NORMAL); +} + +#define HIERARCHY_INDENT 3 + +bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit); + #endif /* __PERF_HIST_H */ diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h deleted file mode 100644 index 40bd21488032..000000000000 --- a/tools/perf/util/include/linux/bitmap.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef _PERF_BITOPS_H -#define _PERF_BITOPS_H - -#include <string.h> -#include <linux/bitops.h> - -#define DECLARE_BITMAP(name,bits) \ - unsigned long name[BITS_TO_LONGS(bits)] - -int __bitmap_weight(const unsigned long *bitmap, int bits); -void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, - const unsigned long *bitmap2, int bits); - -#define BITMAP_LAST_WORD_MASK(nbits) \ -( \ - ((nbits) % BITS_PER_LONG) ? \ - (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ -) - -#define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) - -static inline void bitmap_zero(unsigned long *dst, int nbits) -{ - if (small_const_nbits(nbits)) - *dst = 0UL; - else { - int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - memset(dst, 0, len); - } -} - -static inline int bitmap_weight(const unsigned long *src, int nbits) -{ - if (small_const_nbits(nbits)) - return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); - return __bitmap_weight(src, nbits); -} - -static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) -{ - if (small_const_nbits(nbits)) - *dst = *src1 | *src2; - else - __bitmap_or(dst, src1, src2, nbits); -} - -/** - * test_and_set_bit - Set a bit and return its old value - * @nr: Bit to set - * @addr: Address to count from - */ -static inline int test_and_set_bit(int nr, unsigned long *addr) -{ - unsigned long mask = BIT_MASK(nr); - unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - unsigned long old; - - old = *p; - *p = old | mask; - - return (old & mask) != 0; -} - -#endif /* _PERF_BITOPS_H */ diff --git a/tools/perf/util/include/linux/string.h b/tools/perf/util/include/linux/string.h deleted file mode 100644 index 6f19c548ecc0..000000000000 --- a/tools/perf/util/include/linux/string.h +++ /dev/null @@ -1,3 +0,0 @@ -#include <string.h> - -void *memdup(const void *src, size_t len); diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 97f963a3dcb9..05d815851be1 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1744,7 +1744,7 @@ static void intel_pt_free(struct perf_session *session) auxtrace_heap__free(&pt->heap); intel_pt_free_events(session); session->auxtrace = NULL; - thread__delete(pt->unknown_thread); + thread__put(pt->unknown_thread); free(pt); } @@ -2068,6 +2068,15 @@ int intel_pt_process_auxtrace_info(union perf_event *event, err = -ENOMEM; goto err_free_queues; } + + /* + * Since this thread will not be kept in any rbtree not in a + * list, initialize its list node so that at thread__put() the + * current thread lifetime assuption is kept and we don't segfault + * at list_del_init(). + */ + INIT_LIST_HEAD(&pt->unknown_thread->node); + err = thread__set_comm(pt->unknown_thread, "unknown", 0); if (err) goto err_delete_thread; @@ -2153,7 +2162,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event, return 0; err_delete_thread: - thread__delete(pt->unknown_thread); + thread__zput(pt->unknown_thread); err_free_queues: intel_pt_log_disable(); auxtrace_queues__free(&pt->queues); diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h new file mode 100644 index 000000000000..a1e99da0715a --- /dev/null +++ b/tools/perf/util/jit.h @@ -0,0 +1,15 @@ +#ifndef __JIT_H__ +#define __JIT_H__ + +#include <data.h> + +extern int jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes); + +extern int jit_inject_record(const char *filename); + +#endif /* __JIT_H__ */ diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c new file mode 100644 index 000000000000..cd272cc21e05 --- /dev/null +++ b/tools/perf/util/jitdump.c @@ -0,0 +1,697 @@ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <inttypes.h> +#include <byteswap.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include "util.h" +#include "event.h" +#include "debug.h" +#include "evlist.h" +#include "symbol.h" +#include "strlist.h" +#include <elf.h> + +#include "session.h" +#include "jit.h" +#include "jitdump.h" +#include "genelf.h" +#include "../builtin.h" + +struct jit_buf_desc { + struct perf_data_file *output; + struct perf_session *session; + struct machine *machine; + union jr_entry *entry; + void *buf; + uint64_t sample_type; + size_t bufsize; + FILE *in; + bool needs_bswap; /* handles cross-endianess */ + void *debug_data; + size_t nr_debug_entries; + uint32_t code_load_count; + u64 bytes_written; + struct rb_root code_root; + char dir[PATH_MAX]; +}; + +struct debug_line_info { + unsigned long vma; + unsigned int lineno; + /* The filename format is unspecified, absolute path, relative etc. */ + char const filename[0]; +}; + +struct jit_tool { + struct perf_tool tool; + struct perf_data_file output; + struct perf_data_file input; + u64 bytes_written; +}; + +#define hmax(a, b) ((a) > (b) ? (a) : (b)) +#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool)) + +static int +jit_emit_elf(char *filename, + const char *sym, + uint64_t code_addr, + const void *code, + int csize, + void *debug, + int nr_debug_entries) +{ + int ret, fd; + + if (verbose > 0) + fprintf(stderr, "write ELF image %s\n", filename); + + fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644); + if (fd == -1) { + pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno)); + return -1; + } + + ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries); + + close(fd); + + if (ret) + unlink(filename); + + return ret; +} + +static void +jit_close(struct jit_buf_desc *jd) +{ + if (!(jd && jd->in)) + return; + funlockfile(jd->in); + fclose(jd->in); + jd->in = NULL; +} + +static int +jit_validate_events(struct perf_session *session) +{ + struct perf_evsel *evsel; + + /* + * check that all events use CLOCK_MONOTONIC + */ + evlist__for_each(session->evlist, evsel) { + if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC) + return -1; + } + return 0; +} + +static int +jit_open(struct jit_buf_desc *jd, const char *name) +{ + struct jitheader header; + struct jr_prefix *prefix; + ssize_t bs, bsz = 0; + void *n, *buf = NULL; + int ret, retval = -1; + + jd->in = fopen(name, "r"); + if (!jd->in) + return -1; + + bsz = hmax(sizeof(header), sizeof(*prefix)); + + buf = malloc(bsz); + if (!buf) + goto error; + + /* + * protect from writer modifying the file while we are reading it + */ + flockfile(jd->in); + + ret = fread(buf, sizeof(header), 1, jd->in); + if (ret != 1) + goto error; + + memcpy(&header, buf, sizeof(header)); + + if (header.magic != JITHEADER_MAGIC) { + if (header.magic != JITHEADER_MAGIC_SW) + goto error; + jd->needs_bswap = true; + } + + if (jd->needs_bswap) { + header.version = bswap_32(header.version); + header.total_size = bswap_32(header.total_size); + header.pid = bswap_32(header.pid); + header.elf_mach = bswap_32(header.elf_mach); + header.timestamp = bswap_64(header.timestamp); + header.flags = bswap_64(header.flags); + } + + if (verbose > 2) + pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\n", + header.version, + header.total_size, + (unsigned long long)header.timestamp, + header.pid, + header.elf_mach); + + if (header.flags & JITDUMP_FLAGS_RESERVED) { + pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n", + (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED); + goto error; + } + + /* + * validate event is using the correct clockid + */ + if (jit_validate_events(jd->session)) { + pr_err("error, jitted code must be sampled with perf record -k 1\n"); + goto error; + } + + bs = header.total_size - sizeof(header); + + if (bs > bsz) { + n = realloc(buf, bs); + if (!n) + goto error; + bsz = bs; + buf = n; + /* read extra we do not know about */ + ret = fread(buf, bs - bsz, 1, jd->in); + if (ret != 1) + goto error; + } + /* + * keep dirname for generating files and mmap records + */ + strcpy(jd->dir, name); + dirname(jd->dir); + + return 0; +error: + funlockfile(jd->in); + fclose(jd->in); + return retval; +} + +static union jr_entry * +jit_get_next_entry(struct jit_buf_desc *jd) +{ + struct jr_prefix *prefix; + union jr_entry *jr; + void *addr; + size_t bs, size; + int id, ret; + + if (!(jd && jd->in)) + return NULL; + + if (jd->buf == NULL) { + size_t sz = getpagesize(); + if (sz < sizeof(*prefix)) + sz = sizeof(*prefix); + + jd->buf = malloc(sz); + if (jd->buf == NULL) + return NULL; + + jd->bufsize = sz; + } + + prefix = jd->buf; + + /* + * file is still locked at this point + */ + ret = fread(prefix, sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + if (jd->needs_bswap) { + prefix->id = bswap_32(prefix->id); + prefix->total_size = bswap_32(prefix->total_size); + prefix->timestamp = bswap_64(prefix->timestamp); + } + id = prefix->id; + size = prefix->total_size; + + bs = (size_t)size; + if (bs < sizeof(*prefix)) + return NULL; + + if (id >= JIT_CODE_MAX) { + pr_warning("next_entry: unknown prefix %d, skipping\n", id); + return NULL; + } + if (bs > jd->bufsize) { + void *n; + n = realloc(jd->buf, bs); + if (!n) + return NULL; + jd->buf = n; + jd->bufsize = bs; + } + + addr = ((void *)jd->buf) + sizeof(*prefix); + + ret = fread(addr, bs - sizeof(*prefix), 1, jd->in); + if (ret != 1) + return NULL; + + jr = (union jr_entry *)jd->buf; + + switch(id) { + case JIT_CODE_DEBUG_INFO: + if (jd->needs_bswap) { + uint64_t n; + jr->info.code_addr = bswap_64(jr->info.code_addr); + jr->info.nr_entry = bswap_64(jr->info.nr_entry); + for (n = 0 ; n < jr->info.nr_entry; n++) { + jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr); + jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno); + jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim); + } + } + break; + case JIT_CODE_CLOSE: + break; + case JIT_CODE_LOAD: + if (jd->needs_bswap) { + jr->load.pid = bswap_32(jr->load.pid); + jr->load.tid = bswap_32(jr->load.tid); + jr->load.vma = bswap_64(jr->load.vma); + jr->load.code_addr = bswap_64(jr->load.code_addr); + jr->load.code_size = bswap_64(jr->load.code_size); + jr->load.code_index= bswap_64(jr->load.code_index); + } + jd->code_load_count++; + break; + case JIT_CODE_MOVE: + if (jd->needs_bswap) { + jr->move.pid = bswap_32(jr->move.pid); + jr->move.tid = bswap_32(jr->move.tid); + jr->move.vma = bswap_64(jr->move.vma); + jr->move.old_code_addr = bswap_64(jr->move.old_code_addr); + jr->move.new_code_addr = bswap_64(jr->move.new_code_addr); + jr->move.code_size = bswap_64(jr->move.code_size); + jr->move.code_index = bswap_64(jr->move.code_index); + } + break; + case JIT_CODE_MAX: + default: + return NULL; + } + return jr; +} + +static int +jit_inject_event(struct jit_buf_desc *jd, union perf_event *event) +{ + ssize_t size; + + size = perf_data_file__write(jd->output, event, event->header.size); + if (size < 0) + return -1; + + jd->bytes_written += size; + return 0; +} + +static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + uint64_t code, addr; + uintptr_t uaddr; + char *filename; + struct stat st; + size_t size; + u16 idr_size; + const char *sym; + uint32_t count; + int ret, csize; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->load.pid; + tid = jr->load.tid; + csize = jr->load.code_size; + addr = jr->load.code_addr; + sym = (void *)((unsigned long)jr + sizeof(jr->load)); + code = (unsigned long)jr + jr->load.p.total_size - csize; + count = jr->load.code_index; + idr_size = jd->machine->id_hdr_size; + + event = calloc(1, sizeof(*event) + idr_size); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%u.so", + jd->dir, + pid, + count); + + size++; /* for \0 */ + + size = PERF_ALIGN(size, sizeof(u64)); + uaddr = (uintptr_t)code; + ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries); + + if (jd->debug_data && jd->nr_debug_entries) { + free(jd->debug_data); + jd->debug_data = NULL; + jd->nr_debug_entries = 0; + } + + if (ret) { + free(event); + return -1; + } + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = addr; + event->mmap2.len = csize; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + /* + * mark dso as use to generate buildid in the header + */ + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr) +{ + struct perf_sample sample; + union perf_event *event; + struct perf_tool *tool = jd->session->tool; + char *filename; + size_t size; + struct stat st; + u16 idr_size; + int ret; + pid_t pid, tid; + struct { + u32 pid, tid; + u64 time; + } *id; + + pid = jr->move.pid; + tid = jr->move.tid; + idr_size = jd->machine->id_hdr_size; + + /* + * +16 to account for sample_id_all (hack) + */ + event = calloc(1, sizeof(*event) + 16); + if (!event) + return -1; + + filename = event->mmap2.filename; + size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%"PRIu64, + jd->dir, + pid, + jr->move.code_index); + + size++; /* for \0 */ + + if (stat(filename, &st)) + memset(&st, 0, sizeof(stat)); + + size = PERF_ALIGN(size, sizeof(u64)); + + event->mmap2.header.type = PERF_RECORD_MMAP2; + event->mmap2.header.misc = PERF_RECORD_MISC_USER; + event->mmap2.header.size = (sizeof(event->mmap2) - + (sizeof(event->mmap2.filename) - size) + idr_size); + event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET; + event->mmap2.start = jr->move.new_code_addr; + event->mmap2.len = jr->move.code_size; + event->mmap2.pid = pid; + event->mmap2.tid = tid; + event->mmap2.ino = st.st_ino; + event->mmap2.maj = major(st.st_dev); + event->mmap2.min = minor(st.st_dev); + event->mmap2.prot = st.st_mode; + event->mmap2.flags = MAP_SHARED; + event->mmap2.ino_generation = 1; + + id = (void *)((unsigned long)event + event->mmap.header.size - idr_size); + if (jd->sample_type & PERF_SAMPLE_TID) { + id->pid = pid; + id->tid = tid; + } + if (jd->sample_type & PERF_SAMPLE_TIME) + id->time = jr->load.p.timestamp; + + /* + * create pseudo sample to induce dso hit increment + * use first address as sample address + */ + memset(&sample, 0, sizeof(sample)); + sample.pid = pid; + sample.tid = tid; + sample.time = id->time; + sample.ip = jr->move.new_code_addr; + + ret = perf_event__process_mmap2(tool, event, &sample, jd->machine); + if (ret) + return ret; + + ret = jit_inject_event(jd, event); + if (!ret) + build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine); + + return ret; +} + +static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr) +{ + void *data; + size_t sz; + + if (!(jd && jr)) + return -1; + + sz = jr->prefix.total_size - sizeof(jr->info); + data = malloc(sz); + if (!data) + return -1; + + memcpy(data, &jr->info.entries, sz); + + jd->debug_data = data; + + /* + * we must use nr_entry instead of size here because + * we cannot distinguish actual entry from padding otherwise + */ + jd->nr_debug_entries = jr->info.nr_entry; + + return 0; +} + +static int +jit_process_dump(struct jit_buf_desc *jd) +{ + union jr_entry *jr; + int ret; + + while ((jr = jit_get_next_entry(jd))) { + switch(jr->prefix.id) { + case JIT_CODE_LOAD: + ret = jit_repipe_code_load(jd, jr); + break; + case JIT_CODE_MOVE: + ret = jit_repipe_code_move(jd, jr); + break; + case JIT_CODE_DEBUG_INFO: + ret = jit_repipe_debug_info(jd, jr); + break; + default: + ret = 0; + continue; + } + } + return ret; +} + +static int +jit_inject(struct jit_buf_desc *jd, char *path) +{ + int ret; + + if (verbose > 0) + fprintf(stderr, "injecting: %s\n", path); + + ret = jit_open(jd, path); + if (ret) + return -1; + + ret = jit_process_dump(jd); + + jit_close(jd); + + if (verbose > 0) + fprintf(stderr, "injected: %s (%d)\n", path, ret); + + return 0; +} + +/* + * File must be with pattern .../jit-XXXX.dump + * where XXXX is the PID of the process which did the mmap() + * as captured in the RECORD_MMAP record + */ +static int +jit_detect(char *mmap_name, pid_t pid) + { + char *p; + char *end = NULL; + pid_t pid2; + + if (verbose > 2) + fprintf(stderr, "jit marker trying : %s\n", mmap_name); + /* + * get file name + */ + p = strrchr(mmap_name, '/'); + if (!p) + return -1; + + /* + * match prefix + */ + if (strncmp(p, "/jit-", 5)) + return -1; + + /* + * skip prefix + */ + p += 5; + + /* + * must be followed by a pid + */ + if (!isdigit(*p)) + return -1; + + pid2 = (int)strtol(p, &end, 10); + if (!end) + return -1; + + /* + * pid does not match mmap pid + * pid==0 in system-wide mode (synthesized) + */ + if (pid && pid2 != pid) + return -1; + /* + * validate suffix + */ + if (strcmp(end, ".dump")) + return -1; + + if (verbose > 0) + fprintf(stderr, "jit marker found: %s\n", mmap_name); + + return 0; +} + +int +jit_process(struct perf_session *session, + struct perf_data_file *output, + struct machine *machine, + char *filename, + pid_t pid, + u64 *nbytes) +{ + struct perf_evsel *first; + struct jit_buf_desc jd; + int ret; + + /* + * first, detect marker mmap (i.e., the jitdump mmap) + */ + if (jit_detect(filename, pid)) + return 0; + + memset(&jd, 0, sizeof(jd)); + + jd.session = session; + jd.output = output; + jd.machine = machine; + + /* + * track sample_type to compute id_all layout + * perf sets the same sample type to all events as of now + */ + first = perf_evlist__first(session->evlist); + jd.sample_type = first->attr.sample_type; + + *nbytes = 0; + + ret = jit_inject(&jd, filename); + if (!ret) { + *nbytes = jd.bytes_written; + ret = 1; + } + + return ret; +} diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h new file mode 100644 index 000000000000..b66c1f503d9e --- /dev/null +++ b/tools/perf/util/jitdump.h @@ -0,0 +1,124 @@ +/* + * jitdump.h: jitted code info encapsulation file format + * + * Adapted from OProfile GPLv2 support jidump.h: + * Copyright 2007 OProfile authors + * Jens Wilke + * Daniel Hansel + * Copyright IBM Corporation 2007 + */ +#ifndef JITDUMP_H +#define JITDUMP_H + +#include <sys/time.h> +#include <time.h> +#include <stdint.h> + +/* JiTD */ +#define JITHEADER_MAGIC 0x4A695444 +#define JITHEADER_MAGIC_SW 0x4454694A + +#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7) + +#define JITHEADER_VERSION 1 + +enum jitdump_flags_bits { + JITDUMP_FLAGS_MAX_BIT, +}; + +#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \ + (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0) + +struct jitheader { + uint32_t magic; /* characters "jItD" */ + uint32_t version; /* header version */ + uint32_t total_size; /* total size of header */ + uint32_t elf_mach; /* elf mach target */ + uint32_t pad1; /* reserved */ + uint32_t pid; /* JIT process id */ + uint64_t timestamp; /* timestamp */ + uint64_t flags; /* flags */ +}; + +enum jit_record_type { + JIT_CODE_LOAD = 0, + JIT_CODE_MOVE = 1, + JIT_CODE_DEBUG_INFO = 2, + JIT_CODE_CLOSE = 3, + + JIT_CODE_MAX, +}; + +/* record prefix (mandatory in each record) */ +struct jr_prefix { + uint32_t id; + uint32_t total_size; + uint64_t timestamp; +}; + +struct jr_code_load { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct jr_code_close { + struct jr_prefix p; +}; + +struct jr_code_move { + struct jr_prefix p; + + uint32_t pid; + uint32_t tid; + uint64_t vma; + uint64_t old_code_addr; + uint64_t new_code_addr; + uint64_t code_size; + uint64_t code_index; +}; + +struct debug_entry { + uint64_t addr; + int lineno; /* source line number starting at 1 */ + int discrim; /* column discriminator, 0 is default */ + const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */ +}; + +struct jr_code_debug_info { + struct jr_prefix p; + + uint64_t code_addr; + uint64_t nr_entry; + struct debug_entry entries[0]; +}; + +union jr_entry { + struct jr_code_debug_info info; + struct jr_code_close close; + struct jr_code_load load; + struct jr_code_move move; + struct jr_prefix prefix; +}; + +static inline struct debug_entry * +debug_entry_next(struct debug_entry *ent) +{ + void *a = ent + 1; + size_t l = strlen(ent->name) + 1; + return a + l; +} + +static inline char * +debug_entry_file(struct debug_entry *ent) +{ + void *a = ent + 1; + return a; +} + +#endif /* !JITDUMP_H */ diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index ae825d4ec110..d01e73592f6e 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -122,6 +122,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm, bool kvm_exit_event(struct perf_evsel *evsel); bool kvm_entry_event(struct perf_evsel *evsel); +int setup_kvm_events_tp(struct perf_kvm_stat *kvm); #define define_exit_reasons_table(name, symbols) \ static struct exit_reasons_table name[] = { \ @@ -133,8 +134,13 @@ bool kvm_entry_event(struct perf_evsel *evsel); */ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid); -extern const char * const kvm_events_tp[]; +extern const char *kvm_events_tp[]; extern struct kvm_reg_events_ops kvm_reg_events_ops[]; extern const char * const kvm_skip_events[]; +extern const char *vcpu_id_str; +extern const int decode_str_len; +extern const char *kvm_exit_reason; +extern const char *kvm_entry_trace; +extern const char *kvm_exit_trace; #endif /* __PERF_KVM_STAT_H */ diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 4f6a4780bd5f..00724d496d38 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -4,17 +4,18 @@ */ #include <stdio.h> -#include <sys/utsname.h> #include "util.h" #include "debug.h" #include "llvm-utils.h" #include "cache.h" #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ - "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \ - "$KERNEL_INC_OPTIONS -Wno-unused-value " \ - "-Wno-pointer-sign -working-directory " \ - "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -" + "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ + "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ + "$CLANG_OPTIONS $KERNEL_INC_OPTIONS " \ + "-Wno-unused-value -Wno-pointer-sign " \ + "-working-directory $WORKING_DIR " \ + "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -" struct llvm_param llvm_param = { .clang_path = "clang", @@ -214,18 +215,19 @@ static int detect_kbuild_dir(char **kbuild_dir) const char *suffix_dir = ""; char *autoconf_path; - struct utsname utsname; int err; if (!test_dir) { - err = uname(&utsname); - if (err) { - pr_warning("uname failed: %s\n", strerror(errno)); + /* _UTSNAME_LENGTH is 65 */ + char release[128]; + + err = fetch_kernel_version(NULL, release, + sizeof(release)); + if (err) return -EINVAL; - } - test_dir = utsname.release; + test_dir = release; prefix_dir = "/lib/modules/"; suffix_dir = "/build"; } @@ -326,13 +328,15 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz) { - int err; - char clang_path[PATH_MAX]; + size_t obj_buf_sz; + void *obj_buf = NULL; + int err, nr_cpus_avail; + unsigned int kernel_version; + char linux_version_code_str[64]; const char *clang_opt = llvm_param.clang_opt; - const char *template = llvm_param.clang_bpf_cmd_template; + char clang_path[PATH_MAX], nr_cpus_avail_str[64]; char *kbuild_dir = NULL, *kbuild_include_opts = NULL; - void *obj_buf = NULL; - size_t obj_buf_sz; + const char *template = llvm_param.clang_bpf_cmd_template; if (!template) template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; @@ -354,6 +358,24 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, */ get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); + nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF); + if (nr_cpus_avail <= 0) { + pr_err( +"WARNING:\tunable to get available CPUs in this system: %s\n" +" \tUse 128 instead.\n", strerror(errno)); + nr_cpus_avail = 128; + } + snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d", + nr_cpus_avail); + + if (fetch_kernel_version(&kernel_version, NULL, 0)) + kernel_version = 0; + + snprintf(linux_version_code_str, sizeof(linux_version_code_str), + "0x%x", kernel_version); + + force_set_env("NR_CPUS", nr_cpus_avail_str); + force_set_env("LINUX_VERSION_CODE", linux_version_code_str); force_set_env("CLANG_EXEC", clang_path); force_set_env("CLANG_OPTIONS", clang_opt); force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5ef90be2a249..ad79297c76c8 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -25,6 +25,7 @@ static void dsos__init(struct dsos *dsos) int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { + memset(machine, 0, sizeof(*machine)); map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); dsos__init(&machine->dsos); @@ -44,6 +45,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->comm_exec = false; machine->kernel_start = 0; + memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps)); + machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) return -ENOMEM; @@ -91,6 +94,7 @@ static void dsos__purge(struct dsos *dsos) list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); + pos->root = NULL; list_del_init(&pos->node); dso__put(pos); } @@ -121,6 +125,7 @@ void machine__delete_threads(struct machine *machine) void machine__exit(struct machine *machine) { + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); machine__exit_vdso(machine); @@ -347,13 +352,18 @@ static void machine__update_thread_pid(struct machine *machine, } th->mg = map_groups__get(leader->mg); - +out_put: + thread__put(leader); return; - out_err: pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid); + goto out_put; } +/* + * Caller must eventually drop thread->refcnt returned with a successfull + * lookup/new thread inserted. + */ static struct thread *____machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid, bool create) @@ -371,7 +381,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, if (th != NULL) { if (th->tid == tid) { machine__update_thread_pid(machine, th, pid); - return th; + return thread__get(th); } machine->last_match = NULL; @@ -384,7 +394,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, if (th->tid == tid) { machine->last_match = th; machine__update_thread_pid(machine, th, pid); - return th; + return thread__get(th); } if (tid < th->tid) @@ -412,7 +422,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, if (thread__init_map_groups(th, machine)) { rb_erase_init(&th->rb_node, &machine->threads); RB_CLEAR_NODE(&th->rb_node); - thread__delete(th); + thread__put(th); return NULL; } /* @@ -436,7 +446,7 @@ struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, struct thread *th; pthread_rwlock_wrlock(&machine->threads_lock); - th = thread__get(__machine__findnew_thread(machine, pid, tid)); + th = __machine__findnew_thread(machine, pid, tid); pthread_rwlock_unlock(&machine->threads_lock); return th; } @@ -446,7 +456,7 @@ struct thread *machine__find_thread(struct machine *machine, pid_t pid, { struct thread *th; pthread_rwlock_rdlock(&machine->threads_lock); - th = thread__get(____machine__findnew_thread(machine, pid, tid, false)); + th = ____machine__findnew_thread(machine, pid, tid, false); pthread_rwlock_unlock(&machine->threads_lock); return th; } @@ -559,11 +569,29 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, return 0; } +static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) +{ + const char *dup_filename; + + if (!filename || !dso || !dso->long_name) + return; + if (dso->long_name[0] != '[') + return; + if (!strchr(filename, '/')) + return; + + dup_filename = strdup(filename); + if (!dup_filename) + return; + + dso__set_long_name(dso, dup_filename, true); +} + struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename) { struct map *map = NULL; - struct dso *dso; + struct dso *dso = NULL; struct kmod_path m; if (kmod_path__parse_name(&m, filename)) @@ -571,8 +599,15 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, m.name); - if (map) + if (map) { + /* + * If the map's dso is an offline module, give dso__load() + * a chance to find the file path of that module by fixing + * long_name. + */ + dso__adjust_kmod_long_name(map->dso, filename); goto out; + } dso = machine__findnew_module_dso(machine, &m, filename); if (dso == NULL) @@ -584,7 +619,11 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, map_groups__insert(&machine->kmaps, map); + /* Put the map here because map_groups__insert alread got it */ + map__put(map); out: + /* put the dso here, corresponding to machine__findnew_module_dso */ + dso__put(dso); free(m.name); return map; } @@ -739,6 +778,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) enum map_type type; u64 start = machine__get_running_kernel_start(machine, NULL); + /* In case of renewal the kernel map, destroy previous one */ + machine__destroy_kernel_maps(machine); + for (type = 0; type < MAP__NR_TYPES; ++type) { struct kmap *kmap; struct map *map; @@ -787,6 +829,7 @@ void machine__destroy_kernel_maps(struct machine *machine) kmap->ref_reloc_sym = NULL; } + map__put(machine->vmlinux_maps[type]); machine->vmlinux_maps[type] = NULL; } } @@ -1083,11 +1126,14 @@ int machine__create_kernel_maps(struct machine *machine) struct dso *kernel = machine__get_kernel(machine); const char *name; u64 addr = machine__get_running_kernel_start(machine, &name); - if (!addr) + int ret; + + if (!addr || kernel == NULL) return -1; - if (kernel == NULL || - __machine__create_kernel_maps(machine, kernel) < 0) + ret = __machine__create_kernel_maps(machine, kernel); + dso__put(kernel); + if (ret < 0) return -1; if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { @@ -1608,6 +1654,8 @@ static int add_callchain_ip(struct thread *thread, } } + if (symbol_conf.hide_unresolved && al.sym == NULL) + return 0; return callchain_cursor_append(&callchain_cursor, al.addr, al.map, al.sym); } @@ -1862,6 +1910,9 @@ check_calls: static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; + + if (symbol_conf.hide_unresolved && entry->sym == NULL) + return 0; return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym); } diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 2c2b443df5ba..1a3e45baf97f 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -180,6 +180,16 @@ struct symbol *machine__find_kernel_symbol(struct machine *machine, } static inline +struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, + enum map_type type, const char *name, + struct map **mapp, + symbol_filter_t filter) +{ + return map_groups__find_symbol_by_name(&machine->kmaps, type, name, + mapp, filter); +} + +static inline struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, struct map **mapp, symbol_filter_t filter) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4e38c396a897..171b6d10a04b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -26,8 +26,8 @@ const char *map_type__name[MAP__NR_TYPES] = { static inline int is_anon_memory(const char *filename) { return !strcmp(filename, "//anon") || - !strcmp(filename, "/dev/zero (deleted)") || - !strcmp(filename, "/anon_hugepage (deleted)"); + !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) || + !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1); } static inline int is_no_dso_memory(const char *filename) @@ -644,6 +644,12 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) return printed; } +static void __map_groups__insert(struct map_groups *mg, struct map *map) +{ + __maps__insert(&mg->maps[map->type], map); + map->groups = mg; +} + static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) { struct rb_root *root; @@ -682,9 +688,10 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } before->end = map->start; - __maps__insert(maps, before); + __map_groups__insert(pos->groups, before); if (verbose >= 2) map__fprintf(before, fp); + map__put(before); } if (map->end < pos->end) { @@ -696,9 +703,10 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } after->start = map->end; - __maps__insert(maps, after); + __map_groups__insert(pos->groups, after); if (verbose >= 2) map__fprintf(after, fp); + map__put(after); } put_map: map__put(pos); @@ -736,6 +744,7 @@ int map_groups__clone(struct map_groups *mg, if (new == NULL) goto out_unlock; map_groups__insert(mg, new); + map__put(new); } err = 0; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c new file mode 100644 index 000000000000..75465f89a413 --- /dev/null +++ b/tools/perf/util/mem-events.c @@ -0,0 +1,255 @@ +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <api/fs/fs.h> +#include "mem-events.h" +#include "debug.h" +#include "symbol.h" + +#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s } + +struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = { + E("ldlat-loads", "cpu/mem-loads,ldlat=30/P", "mem-loads"), + E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"), +}; +#undef E + +#undef E + +char *perf_mem_events__name(int i) +{ + return (char *)perf_mem_events[i].name; +} + +int perf_mem_events__parse(const char *str) +{ + char *tok, *saveptr = NULL; + bool found = false; + char *buf; + int j; + + /* We need buffer that we know we can write to. */ + buf = malloc(strlen(str) + 1); + if (!buf) + return -ENOMEM; + + strcpy(buf, str); + + tok = strtok_r((char *)buf, ",", &saveptr); + + while (tok) { + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + struct perf_mem_event *e = &perf_mem_events[j]; + + if (strstr(e->tag, tok)) + e->record = found = true; + } + + tok = strtok_r(NULL, ",", &saveptr); + } + + free(buf); + + if (found) + return 0; + + pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str); + return -1; +} + +int perf_mem_events__init(void) +{ + const char *mnt = sysfs__mount(); + bool found = false; + int j; + + if (!mnt) + return -ENOENT; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + char path[PATH_MAX]; + struct perf_mem_event *e = &perf_mem_events[j]; + struct stat st; + + scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s", + mnt, e->sysfs_name); + + if (!stat(path, &st)) + e->supported = found = true; + } + + return found ? 0 : -ENOENT; +} + +static const char * const tlb_access[] = { + "N/A", + "HIT", + "MISS", + "L1", + "L2", + "Walker", + "Fault", +}; + +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t l = 0, i; + u64 m = PERF_MEM_TLB_NA; + u64 hit, miss; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_dtlb; + + hit = m & PERF_MEM_TLB_HIT; + miss = m & PERF_MEM_TLB_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); + + for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, tlb_access[i]); + } + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + if (hit) + l += scnprintf(out + l, sz - l, " hit"); + if (miss) + l += scnprintf(out + l, sz - l, " miss"); + + return l; +} + +static const char * const mem_lvl[] = { + "N/A", + "HIT", + "MISS", + "L1", + "LFB", + "L2", + "L3", + "Local RAM", + "Remote RAM (1 hop)", + "Remote RAM (2 hops)", + "Remote Cache (1 hop)", + "Remote Cache (2 hops)", + "I/O", + "Uncached", +}; + +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_LVL_NA; + u64 hit, miss; + + if (mem_info) + m = mem_info->data_src.mem_lvl; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + hit = m & PERF_MEM_LVL_HIT; + miss = m & PERF_MEM_LVL_MISS; + + /* already taken care of */ + m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); + + for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, mem_lvl[i]); + } + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + if (hit) + l += scnprintf(out + l, sz - l, " hit"); + if (miss) + l += scnprintf(out + l, sz - l, " miss"); + + return l; +} + +static const char * const snoop_access[] = { + "N/A", + "None", + "Miss", + "Hit", + "HitM", +}; + +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + size_t i, l = 0; + u64 m = PERF_MEM_SNOOP_NA; + + sz -= 1; /* -1 for null termination */ + out[0] = '\0'; + + if (mem_info) + m = mem_info->data_src.mem_snoop; + + for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) { + if (!(m & 0x1)) + continue; + if (l) { + strcat(out, " or "); + l += 4; + } + l += scnprintf(out + l, sz - l, snoop_access[i]); + } + + if (*out == '\0') + l += scnprintf(out, sz - l, "N/A"); + + return l; +} + +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + u64 mask = PERF_MEM_LOCK_NA; + int l; + + if (mem_info) + mask = mem_info->data_src.mem_lock; + + if (mask & PERF_MEM_LOCK_NA) + l = scnprintf(out, sz, "N/A"); + else if (mask & PERF_MEM_LOCK_LOCKED) + l = scnprintf(out, sz, "Yes"); + else + l = scnprintf(out, sz, "No"); + + return l; +} + +int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) +{ + int i = 0; + + i += perf_mem__lvl_scnprintf(out, sz, mem_info); + i += scnprintf(out + i, sz - i, "|SNP "); + i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|TLB "); + i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); + i += scnprintf(out + i, sz - i, "|LCK "); + i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); + + return i; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h new file mode 100644 index 000000000000..5d6d93066a6e --- /dev/null +++ b/tools/perf/util/mem-events.h @@ -0,0 +1,35 @@ +#ifndef __PERF_MEM_EVENTS_H +#define __PERF_MEM_EVENTS_H + +#include <stdbool.h> + +struct perf_mem_event { + bool record; + bool supported; + const char *tag; + const char *name; + const char *sysfs_name; +}; + +enum { + PERF_MEM_EVENTS__LOAD, + PERF_MEM_EVENTS__STORE, + PERF_MEM_EVENTS__MAX, +}; + +extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX]; + +int perf_mem_events__parse(const char *str); +int perf_mem_events__init(void); + +char *perf_mem_events__name(int i); + +struct mem_info; +int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); +int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); + +int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); + +#endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c deleted file mode 100644 index 53ef006a951c..000000000000 --- a/tools/perf/util/pager.c +++ /dev/null @@ -1,95 +0,0 @@ -#include "cache.h" -#include "run-command.h" -#include "sigchain.h" - -/* - * This is split up from the rest of git so that we can do - * something different on Windows. - */ - -static int spawned_pager; - -static void pager_preexec(void) -{ - /* - * Work around bug in "less" by not starting it until we - * have real input - */ - fd_set in; - - FD_ZERO(&in); - FD_SET(0, &in); - select(1, &in, NULL, &in, NULL); - - setenv("LESS", "FRSX", 0); -} - -static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; -static struct child_process pager_process; - -static void wait_for_pager(void) -{ - fflush(stdout); - fflush(stderr); - /* signal EOF to pager */ - close(1); - close(2); - finish_command(&pager_process); -} - -static void wait_for_pager_signal(int signo) -{ - wait_for_pager(); - sigchain_pop(signo); - raise(signo); -} - -void setup_pager(void) -{ - const char *pager = getenv("PERF_PAGER"); - - if (!isatty(1)) - return; - if (!pager) - pager = getenv("PAGER"); - if (!(pager || access("/usr/bin/pager", X_OK))) - pager = "/usr/bin/pager"; - if (!(pager || access("/usr/bin/less", X_OK))) - pager = "/usr/bin/less"; - if (!pager) - pager = "cat"; - if (!*pager || !strcmp(pager, "cat")) - return; - - spawned_pager = 1; /* means we are emitting to terminal */ - - /* spawn the pager */ - pager_argv[2] = pager; - pager_process.argv = pager_argv; - pager_process.in = -1; - pager_process.preexec_cb = pager_preexec; - - if (start_command(&pager_process)) - return; - - /* original process continues, but writes to the pipe */ - dup2(pager_process.in, 1); - if (isatty(2)) - dup2(pager_process.in, 2); - close(pager_process.in); - - /* this makes sure that the parent terminates after the pager */ - sigchain_push_common(wait_for_pager_signal); - atexit(wait_for_pager); -} - -int pager_in_use(void) -{ - const char *env; - - if (spawned_pager) - return 1; - - env = getenv("PERF_PAGER_IN_USE"); - return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0; -} diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c index 355eecf6bf59..afc088dd7d20 100644 --- a/tools/perf/util/parse-branch-options.c +++ b/tools/perf/util/parse-branch-options.c @@ -1,7 +1,7 @@ #include "perf.h" #include "util/util.h" #include "util/debug.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-branch-options.h" #define BRANCH_OPT(n, m) \ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index bee60583839a..4c19d5e79d8c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -4,9 +4,9 @@ #include "../perf.h" #include "evlist.h" #include "evsel.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "parse-events.h" -#include "exec_cmd.h" +#include <subcmd/exec-cmd.h> #include "string.h" #include "symbol.h" #include "cache.h" @@ -124,6 +124,10 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { .symbol = "dummy", .alias = "", }, + [PERF_COUNT_SW_BPF_OUTPUT] = { + .symbol = "bpf-output", + .alias = "", + }, }; #define __PERF_EVENT_FIELD(config, name) \ @@ -275,7 +279,24 @@ const char *event_type(int type) return "unknown"; } +static int parse_events__is_name_term(struct parse_events_term *term) +{ + return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; +} + +static char *get_config_name(struct list_head *head_terms) +{ + struct parse_events_term *term; + + if (!head_terms) + return NULL; + list_for_each_entry(term, head_terms, list) + if (parse_events__is_name_term(term)) + return term->val.str; + + return NULL; +} static struct perf_evsel * __add_event(struct list_head *list, int *idx, @@ -329,11 +350,25 @@ static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES] return -1; } +typedef int config_term_func_t(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_term_common(struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err); +static int config_attr(struct perf_event_attr *attr, + struct list_head *head, + struct parse_events_error *err, + config_term_func_t config_term); + int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2) + char *type, char *op_result1, char *op_result2, + struct parse_events_error *err, + struct list_head *head_config) { struct perf_event_attr attr; - char name[MAX_NAME_LEN]; + LIST_HEAD(config_terms); + char name[MAX_NAME_LEN], *config_name; int cache_type = -1, cache_op = -1, cache_result = -1; char *op_result[2] = { op_result1, op_result2 }; int i, n; @@ -347,6 +382,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, if (cache_type == -1) return -EINVAL; + config_name = get_config_name(head_config); n = snprintf(name, MAX_NAME_LEN, "%s", type); for (i = 0; (i < 2) && (op_result[i]); i++) { @@ -387,7 +423,16 @@ int parse_events_add_cache(struct list_head *list, int *idx, memset(&attr, 0, sizeof(attr)); attr.config = cache_type | (cache_op << 8) | (cache_result << 16); attr.type = PERF_TYPE_HW_CACHE; - return add_event(list, idx, &attr, name, NULL); + + if (head_config) { + if (config_attr(&attr, head_config, err, + config_term_common)) + return -EINVAL; + + if (get_config_terms(head_config, &config_terms)) + return -ENOMEM; + } + return add_event(list, idx, &attr, config_name ? : name, &config_terms); } static void tracepoint_error(struct parse_events_error *e, int err, @@ -395,6 +440,9 @@ static void tracepoint_error(struct parse_events_error *e, int err, { char help[BUFSIZ]; + if (!e) + return; + /* * We get error directly from syscall errno ( > 0), * or from encoded pointer's error ( < 0). @@ -533,6 +581,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, struct __add_bpf_event_param { struct parse_events_evlist *data; struct list_head *list; + struct list_head *head_config; }; static int add_bpf_event(struct probe_trace_event *tev, int fd, @@ -549,7 +598,8 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, tev->group, tev->event, fd); err = parse_events_add_tracepoint(&new_evsels, &evlist->idx, tev->group, - tev->event, evlist->error, NULL); + tev->event, evlist->error, + param->head_config); if (err) { struct perf_evsel *evsel, *tmp; @@ -574,11 +624,12 @@ static int add_bpf_event(struct probe_trace_event *tev, int fd, int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj) + struct bpf_object *obj, + struct list_head *head_config) { int err; char errbuf[BUFSIZ]; - struct __add_bpf_event_param param = {data, list}; + struct __add_bpf_event_param param = {data, list, head_config}; static bool registered_unprobe_atexit = false; if (IS_ERR(obj) || !obj) { @@ -624,34 +675,128 @@ errout: return err; } +static int +parse_events_config_bpf(struct parse_events_evlist *data, + struct bpf_object *obj, + struct list_head *head_config) +{ + struct parse_events_term *term; + int error_pos; + + if (!head_config || list_empty(head_config)) + return 0; + + list_for_each_entry(term, head_config, list) { + char errbuf[BUFSIZ]; + int err; + + if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) { + snprintf(errbuf, sizeof(errbuf), + "Invalid config term for BPF object"); + errbuf[BUFSIZ - 1] = '\0'; + + data->error->idx = term->err_term; + data->error->str = strdup(errbuf); + return -EINVAL; + } + + err = bpf__config_obj(obj, term, data->evlist, &error_pos); + if (err) { + bpf__strerror_config_obj(obj, term, data->evlist, + &error_pos, err, errbuf, + sizeof(errbuf)); + data->error->help = strdup( +"Hint:\tValid config terms:\n" +" \tmap:[<arraymap>].value<indices>=[value]\n" +" \tmap:[<eventmap>].event<indices>=[event]\n" +"\n" +" \twhere <indices> is something like [0,3...5] or [all]\n" +" \t(add -v to see detail)"); + data->error->str = strdup(errbuf); + if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE) + data->error->idx = term->err_val; + else + data->error->idx = term->err_term + error_pos; + return err; + } + } + return 0; +} + +/* + * Split config terms: + * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ... + * 'call-graph=fp' is 'evt config', should be applied to each + * events in bpf.c. + * 'map:array.value[0]=1' is 'obj config', should be processed + * with parse_events_config_bpf. + * + * Move object config terms from the first list to obj_head_config. + */ +static void +split_bpf_config_terms(struct list_head *evt_head_config, + struct list_head *obj_head_config) +{ + struct parse_events_term *term, *temp; + + /* + * Currectly, all possible user config term + * belong to bpf object. parse_events__is_hardcoded_term() + * happends to be a good flag. + * + * See parse_events_config_bpf() and + * config_term_tracepoint(). + */ + list_for_each_entry_safe(term, temp, evt_head_config, list) + if (!parse_events__is_hardcoded_term(term)) + list_move_tail(&term->list, obj_head_config); +} + int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source) + bool source, + struct list_head *head_config) { + int err; struct bpf_object *obj; + LIST_HEAD(obj_head_config); + + if (head_config) + split_bpf_config_terms(head_config, &obj_head_config); obj = bpf__prepare_load(bpf_file_name, source); - if (IS_ERR(obj) || !obj) { + if (IS_ERR(obj)) { char errbuf[BUFSIZ]; - int err; - err = obj ? PTR_ERR(obj) : -EINVAL; + err = PTR_ERR(obj); if (err == -ENOTSUP) snprintf(errbuf, sizeof(errbuf), "BPF support is not compiled"); else - snprintf(errbuf, sizeof(errbuf), - "BPF object file '%s' is invalid", - bpf_file_name); + bpf__strerror_prepare_load(bpf_file_name, + source, + -err, errbuf, + sizeof(errbuf)); data->error->help = strdup("(add -v to see detail)"); data->error->str = strdup(errbuf); return err; } - return parse_events_load_bpf_obj(data, list, obj); + err = parse_events_load_bpf_obj(data, list, obj, head_config); + if (err) + return err; + err = parse_events_config_bpf(data, obj, &obj_head_config); + + /* + * Caller doesn't know anything about obj_head_config, + * so combine them together again before returnning. + */ + if (head_config) + list_splice_tail(&obj_head_config, head_config); + return err; } static int @@ -738,9 +883,59 @@ static int check_type_val(struct parse_events_term *term, return -EINVAL; } -typedef int config_term_func_t(struct perf_event_attr *attr, - struct parse_events_term *term, - struct parse_events_error *err); +/* + * Update according to parse-events.l + */ +static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { + [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>", + [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config", + [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", + [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", + [PARSE_EVENTS__TERM_TYPE_NAME] = "name", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", + [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", + [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type", + [PARSE_EVENTS__TERM_TYPE_TIME] = "time", + [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph", + [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size", + [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", + [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", +}; + +static bool config_term_shrinked; + +static bool +config_term_avail(int term_type, struct parse_events_error *err) +{ + if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) { + err->str = strdup("Invalid term_type"); + return false; + } + if (!config_term_shrinked) + return true; + + switch (term_type) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + case PARSE_EVENTS__TERM_TYPE_NAME: + return true; + default: + if (!err) + return false; + + /* term_type is validated so indexing is safe */ + if (asprintf(&err->str, "'%s' is not usable in 'perf stat'", + config_term_names[term_type]) < 0) + err->str = NULL; + return false; + } +} + +void parse_events__shrink_config_terms(void) +{ + config_term_shrinked = true; +} static int config_term_common(struct perf_event_attr *attr, struct parse_events_term *term, @@ -807,6 +1002,17 @@ do { \ return -EINVAL; } + /* + * Check term availbility after basic checking so + * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered. + * + * If check availbility at the entry of this function, + * user will see "'<sysfs term>' is not usable in 'perf stat'" + * if an invalid config term is provided for legacy events + * (for example, instructions/badterm/...), which is confusing. + */ + if (!config_term_avail(term->type_term, err)) + return -EINVAL; return 0; #undef CHECK_TYPE_VAL } @@ -953,23 +1159,8 @@ int parse_events_add_numeric(struct parse_events_evlist *data, return -ENOMEM; } - return add_event(list, &data->idx, &attr, NULL, &config_terms); -} - -static int parse_events__is_name_term(struct parse_events_term *term) -{ - return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME; -} - -static char *pmu_event_name(struct list_head *head_terms) -{ - struct parse_events_term *term; - - list_for_each_entry(term, head_terms, list) - if (parse_events__is_name_term(term)) - return term->val.str; - - return NULL; + return add_event(list, &data->idx, &attr, + get_config_name(head_config), &config_terms); } int parse_events_add_pmu(struct parse_events_evlist *data, @@ -1016,7 +1207,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, return -EINVAL; evsel = __add_event(list, &data->idx, &attr, - pmu_event_name(head_config), pmu->cpus, + get_config_name(head_config), pmu->cpus, &config_terms); if (evsel) { evsel->unit = info.unit; @@ -1378,8 +1569,7 @@ int parse_events_terms(struct list_head *terms, const char *str) return 0; } - if (data.terms) - parse_events__free_terms(data.terms); + parse_events_terms__delete(data.terms); return ret; } @@ -1387,9 +1577,10 @@ int parse_events(struct perf_evlist *evlist, const char *str, struct parse_events_error *err) { struct parse_events_evlist data = { - .list = LIST_HEAD_INIT(data.list), - .idx = evlist->nr_entries, - .error = err, + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + .error = err, + .evlist = evlist, }; int ret; @@ -1878,7 +2069,7 @@ restart: for (i = 0; i < max; i++, syms++) { - if (event_glob != NULL && + if (event_glob != NULL && syms->symbol != NULL && !(strglobmatch(syms->symbol, event_glob) || (syms->alias && strglobmatch(syms->alias, event_glob)))) continue; @@ -2060,12 +2251,29 @@ int parse_events_term__clone(struct parse_events_term **new, term->err_term, term->err_val); } -void parse_events__free_terms(struct list_head *terms) +void parse_events_terms__purge(struct list_head *terms) { struct parse_events_term *term, *h; - list_for_each_entry_safe(term, h, terms, list) + list_for_each_entry_safe(term, h, terms, list) { + if (term->array.nr_ranges) + free(term->array.ranges); + list_del_init(&term->list); free(term); + } +} + +void parse_events_terms__delete(struct list_head *terms) +{ + if (!terms) + return; + parse_events_terms__purge(terms); + free(terms); +} + +void parse_events__clear_array(struct parse_events_array *a) +{ + free(a->ranges); } void parse_events_evlist_error(struct parse_events_evlist *data, @@ -2080,6 +2288,33 @@ void parse_events_evlist_error(struct parse_events_evlist *data, WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); } +static void config_terms_list(char *buf, size_t buf_sz) +{ + int i; + bool first = true; + + buf[0] = '\0'; + for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { + const char *name = config_term_names[i]; + + if (!config_term_avail(i, NULL)) + continue; + if (!name) + continue; + if (name[0] == '<') + continue; + + if (strlen(buf) + strlen(name) + 2 >= buf_sz) + return; + + if (!first) + strcat(buf, ","); + else + first = false; + strcat(buf, name); + } +} + /* * Return string contains valid config terms of an event. * @additional_terms: For terms such as PMU sysfs terms. @@ -2087,17 +2322,18 @@ void parse_events_evlist_error(struct parse_events_evlist *data, char *parse_events_formats_error_string(char *additional_terms) { char *str; - static const char *static_terms = "config,config1,config2,name," - "period,freq,branch_type,time," - "call-graph,stack-size\n"; + /* "branch_type" is the longest name */ + char static_terms[__PARSE_EVENTS__TERM_TYPE_NR * + (sizeof("branch_type") - 1)]; + config_terms_list(static_terms, sizeof(static_terms)); /* valid terms */ if (additional_terms) { - if (!asprintf(&str, "valid terms: %s,%s", - additional_terms, static_terms)) + if (asprintf(&str, "valid terms: %s,%s", + additional_terms, static_terms) < 0) goto fail; } else { - if (!asprintf(&str, "valid terms: %s", static_terms)) + if (asprintf(&str, "valid terms: %s", static_terms) < 0) goto fail; } return str; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index f1a6db107241..67e493088e81 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -68,11 +68,21 @@ enum { PARSE_EVENTS__TERM_TYPE_CALLGRAPH, PARSE_EVENTS__TERM_TYPE_STACKSIZE, PARSE_EVENTS__TERM_TYPE_NOINHERIT, - PARSE_EVENTS__TERM_TYPE_INHERIT + PARSE_EVENTS__TERM_TYPE_INHERIT, + __PARSE_EVENTS__TERM_TYPE_NR, +}; + +struct parse_events_array { + size_t nr_ranges; + struct { + unsigned int start; + size_t length; + } *ranges; }; struct parse_events_term { char *config; + struct parse_events_array array; union { char *str; u64 num; @@ -98,12 +108,14 @@ struct parse_events_evlist { int idx; int nr_groups; struct parse_events_error *error; + struct perf_evlist *evlist; }; struct parse_events_terms { struct list_head *terms; }; +void parse_events__shrink_config_terms(void); int parse_events__is_hardcoded_term(struct parse_events_term *term); int parse_events_term__num(struct parse_events_term **term, int type_term, char *config, u64 num, @@ -115,7 +127,9 @@ int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term); -void parse_events__free_terms(struct list_head *terms); +void parse_events_terms__delete(struct list_head *terms); +void parse_events_terms__purge(struct list_head *terms); +void parse_events__clear_array(struct parse_events_array *a); int parse_events__modifier_event(struct list_head *list, char *str, bool add); int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); @@ -126,18 +140,22 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx, int parse_events_load_bpf(struct parse_events_evlist *data, struct list_head *list, char *bpf_file_name, - bool source); + bool source, + struct list_head *head_config); /* Provide this function for perf test */ struct bpf_object; int parse_events_load_bpf_obj(struct parse_events_evlist *data, struct list_head *list, - struct bpf_object *obj); + struct bpf_object *obj, + struct list_head *head_config); int parse_events_add_numeric(struct parse_events_evlist *data, struct list_head *list, u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head *list, int *idx, - char *type, char *op_result1, char *op_result2); + char *type, char *op_result1, char *op_result2, + struct parse_events_error *error, + struct list_head *head_config); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); int parse_events_add_pmu(struct parse_events_evlist *data, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 58c5831ffd5c..1477fbc78993 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -9,8 +9,8 @@ %{ #include <errno.h> #include "../perf.h" -#include "parse-events-bison.h" #include "parse-events.h" +#include "parse-events-bison.h" char *parse_events_get_text(yyscan_t yyscanner); YYSTYPE *parse_events_get_lval(yyscan_t yyscanner); @@ -111,6 +111,7 @@ do { \ %x mem %s config %x event +%x array group [^,{}/]*[{][^}]*[}][^,{}/]* event_pmu [^,{}/]+[/][^/]*[/][^,{}/]* @@ -122,7 +123,7 @@ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?][a-zA-Z0-9_*?.]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* /* If you add a modifier you need to update check_modifier() */ modifier_event [ukhpPGHSDI]+ modifier_bp [rwx]{1,3} @@ -176,10 +177,17 @@ modifier_bp [rwx]{1,3} } +<array>{ +"]" { BEGIN(config); return ']'; } +{num_dec} { return value(yyscanner, 10); } +{num_hex} { return value(yyscanner, 16); } +, { return ','; } +"\.\.\." { return PE_ARRAY_RANGE; } +} + <config>{ /* - * Please update parse_events_formats_error_string any time - * new static term is added. + * Please update config_term_names when new static term is added. */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } @@ -196,6 +204,8 @@ no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } +\[all\] { return PE_ARRAY_ALL; } +"[" { BEGIN(array); return '['; } } <mem>{ @@ -238,6 +248,7 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } +bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } /* * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately. diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index ad379968d4c1..5be4a5f216d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -28,7 +28,7 @@ do { \ INIT_LIST_HEAD(list); \ } while (0) -static inc_group_count(struct list_head *list, +static void inc_group_count(struct list_head *list, struct parse_events_evlist *data) { /* Count groups only have more than 1 members */ @@ -48,6 +48,7 @@ static inc_group_count(struct list_head *list, %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR %token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT +%token PE_ARRAY_ALL PE_ARRAY_RANGE %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW @@ -64,6 +65,7 @@ static inc_group_count(struct list_head *list, %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT %type <num> value_sym %type <head> event_config +%type <head> opt_event_config %type <term> event_term %type <head> event_pmu %type <head> event_legacy_symbol @@ -82,6 +84,9 @@ static inc_group_count(struct list_head *list, %type <head> group_def %type <head> group %type <head> groups +%type <array> array +%type <array> array_term +%type <array> array_terms %union { @@ -93,6 +98,7 @@ static inc_group_count(struct list_head *list, char *sys; char *event; } tracepoint_name; + struct parse_events_array array; } %% @@ -211,24 +217,14 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); - parse_events__free_terms($3); - $$ = list; -} -| -PE_NAME '/' '/' -{ - struct parse_events_evlist *data = _data; - struct list_head *list; - - ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(data, list, $1, NULL)); + ABORT_ON(parse_events_add_pmu(data, list, $1, $2)); + parse_events_terms__delete($2); $$ = list; } | @@ -246,7 +242,7 @@ PE_KERNEL_PMU_EVENT sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } | @@ -266,7 +262,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc ALLOC_LIST(list); ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); - parse_events__free_terms(head); + parse_events_terms__delete(head); $$ = list; } @@ -285,7 +281,7 @@ value_sym '/' event_config '/' ALLOC_LIST(list); ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); - parse_events__free_terms($3); + parse_events_terms__delete($3); $$ = list; } | @@ -302,33 +298,39 @@ value_sym sep_slash_dc } event_legacy_cache: -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, $5, error, $6)); + parse_events_terms__delete($6); $$ = list; } | -PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT +PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, $3, NULL, error, $4)); + parse_events_terms__delete($4); $$ = list; } | -PE_NAME_CACHE_TYPE +PE_NAME_CACHE_TYPE opt_event_config { struct parse_events_evlist *data = _data; + struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL)); + ABORT_ON(parse_events_add_cache(list, &data->idx, $1, NULL, NULL, error, $2)); + parse_events_terms__delete($2); $$ = list; } @@ -378,24 +380,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc } event_legacy_tracepoint: -tracepoint_name -{ - struct parse_events_evlist *data = _data; - struct parse_events_error *error = data->error; - struct list_head *list; - - ALLOC_LIST(list); - if (error) - error->idx = @1.first_column; - - if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, NULL)) - return -1; - - $$ = list; -} -| -tracepoint_name '/' event_config '/' +tracepoint_name opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; @@ -406,7 +391,7 @@ tracepoint_name '/' event_config '/' error->idx = @1.first_column; if (parse_events_add_tracepoint(list, &data->idx, $1.sys, $1.event, - error, $3)) + error, $2)) return -1; $$ = list; @@ -433,49 +418,68 @@ PE_NAME ':' PE_NAME } event_legacy_numeric: -PE_VALUE ':' PE_VALUE +PE_VALUE ':' PE_VALUE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, $4)); + parse_events_terms__delete($4); $$ = list; } event_legacy_raw: -PE_RAW +PE_RAW opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, $2)); + parse_events_terms__delete($2); $$ = list; } event_bpf_file: -PE_BPF_OBJECT +PE_BPF_OBJECT opt_event_config { struct parse_events_evlist *data = _data; struct parse_events_error *error = data->error; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, false)); + ABORT_ON(parse_events_load_bpf(data, list, $1, false, $2)); + parse_events_terms__delete($2); $$ = list; } | -PE_BPF_SOURCE +PE_BPF_SOURCE opt_event_config { struct parse_events_evlist *data = _data; struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_load_bpf(data, list, $1, true)); + ABORT_ON(parse_events_load_bpf(data, list, $1, true, $2)); + parse_events_terms__delete($2); $$ = list; } +opt_event_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} +| +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_terms *data = _data; @@ -573,6 +577,86 @@ PE_TERM ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL)); $$ = term; } +| +PE_NAME array '=' PE_NAME +{ + struct parse_events_term *term; + int i; + + ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + + term->array = $2; + $$ = term; +} +| +PE_NAME array '=' PE_VALUE +{ + struct parse_events_term *term; + + ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, + $1, $4, &@1, &@4)); + term->array = $2; + $$ = term; +} + +array: +'[' array_terms ']' +{ + $$ = $2; +} +| +PE_ARRAY_ALL +{ + $$.nr_ranges = 0; + $$.ranges = NULL; +} + +array_terms: +array_terms ',' array_term +{ + struct parse_events_array new_array; + + new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges; + new_array.ranges = malloc(sizeof(new_array.ranges[0]) * + new_array.nr_ranges); + ABORT_ON(!new_array.ranges); + memcpy(&new_array.ranges[0], $1.ranges, + $1.nr_ranges * sizeof(new_array.ranges[0])); + memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges, + $3.nr_ranges * sizeof(new_array.ranges[0])); + free($1.ranges); + free($3.ranges); + $$ = new_array; +} +| +array_term + +array_term: +PE_VALUE +{ + struct parse_events_array array; + + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = 1; + $$ = array; +} +| +PE_VALUE PE_ARRAY_RANGE PE_VALUE +{ + struct parse_events_array array; + + ABORT_ON($3 < $1); + array.nr_ranges = 1; + array.ranges = malloc(sizeof(array.ranges[0])); + ABORT_ON(!array.ranges); + array.ranges[0].start = $1; + array.ranges[0].length = $3 - $1 + 1; + $$ = array; +} sep_dc: ':' | diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c deleted file mode 100644 index 9fca09296eb3..000000000000 --- a/tools/perf/util/parse-options.c +++ /dev/null @@ -1,867 +0,0 @@ -#include "util.h" -#include "parse-options.h" -#include "cache.h" -#include "header.h" -#include <linux/string.h> - -#define OPT_SHORT 1 -#define OPT_UNSET 2 - -static struct strbuf error_buf = STRBUF_INIT; - -static int opterror(const struct option *opt, const char *reason, int flags) -{ - if (flags & OPT_SHORT) - return error("switch `%c' %s", opt->short_name, reason); - if (flags & OPT_UNSET) - return error("option `no-%s' %s", opt->long_name, reason); - return error("option `%s' %s", opt->long_name, reason); -} - -static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, - int flags, const char **arg) -{ - if (p->opt) { - *arg = p->opt; - p->opt = NULL; - } else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 || - **(p->argv + 1) == '-')) { - *arg = (const char *)opt->defval; - } else if (p->argc > 1) { - p->argc--; - *arg = *++p->argv; - } else - return opterror(opt, "requires a value", flags); - return 0; -} - -static int get_value(struct parse_opt_ctx_t *p, - const struct option *opt, int flags) -{ - const char *s, *arg = NULL; - const int unset = flags & OPT_UNSET; - int err; - - if (unset && p->opt) - return opterror(opt, "takes no value", flags); - if (unset && (opt->flags & PARSE_OPT_NONEG)) - return opterror(opt, "isn't available", flags); - if (opt->flags & PARSE_OPT_DISABLED) - return opterror(opt, "is not usable", flags); - - if (opt->flags & PARSE_OPT_EXCLUSIVE) { - if (p->excl_opt && p->excl_opt != opt) { - char msg[128]; - - if (((flags & OPT_SHORT) && p->excl_opt->short_name) || - p->excl_opt->long_name == NULL) { - scnprintf(msg, sizeof(msg), "cannot be used with switch `%c'", - p->excl_opt->short_name); - } else { - scnprintf(msg, sizeof(msg), "cannot be used with %s", - p->excl_opt->long_name); - } - opterror(opt, msg, flags); - return -3; - } - p->excl_opt = opt; - } - if (!(flags & OPT_SHORT) && p->opt) { - switch (opt->type) { - case OPTION_CALLBACK: - if (!(opt->flags & PARSE_OPT_NOARG)) - break; - /* FALLTHROUGH */ - case OPTION_BOOLEAN: - case OPTION_INCR: - case OPTION_BIT: - case OPTION_SET_UINT: - case OPTION_SET_PTR: - return opterror(opt, "takes no value", flags); - case OPTION_END: - case OPTION_ARGUMENT: - case OPTION_GROUP: - case OPTION_STRING: - case OPTION_INTEGER: - case OPTION_UINTEGER: - case OPTION_LONG: - case OPTION_U64: - default: - break; - } - } - - switch (opt->type) { - case OPTION_BIT: - if (unset) - *(int *)opt->value &= ~opt->defval; - else - *(int *)opt->value |= opt->defval; - return 0; - - case OPTION_BOOLEAN: - *(bool *)opt->value = unset ? false : true; - if (opt->set) - *(bool *)opt->set = true; - return 0; - - case OPTION_INCR: - *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; - return 0; - - case OPTION_SET_UINT: - *(unsigned int *)opt->value = unset ? 0 : opt->defval; - return 0; - - case OPTION_SET_PTR: - *(void **)opt->value = unset ? NULL : (void *)opt->defval; - return 0; - - case OPTION_STRING: - err = 0; - if (unset) - *(const char **)opt->value = NULL; - else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - *(const char **)opt->value = (const char *)opt->defval; - else - err = get_arg(p, opt, flags, (const char **)opt->value); - - /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ - if (opt->flags & PARSE_OPT_NOEMPTY) { - const char *val = *(const char **)opt->value; - - if (!val) - return err; - - /* Similar to unset if we are given an empty string. */ - if (val[0] == '\0') { - *(const char **)opt->value = NULL; - return 0; - } - } - - return err; - - case OPTION_CALLBACK: - if (unset) - return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; - if (opt->flags & PARSE_OPT_NOARG) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (get_arg(p, opt, flags, &arg)) - return -1; - return (*opt->callback)(opt, arg, 0) ? (-1) : 0; - - case OPTION_INTEGER: - if (unset) { - *(int *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(int *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(int *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - case OPTION_UINTEGER: - if (unset) { - *(unsigned int *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(unsigned int *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(unsigned int *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - case OPTION_LONG: - if (unset) { - *(long *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(long *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(long *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - case OPTION_U64: - if (unset) { - *(u64 *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(u64 *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(u64 *)opt->value = strtoull(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - case OPTION_END: - case OPTION_ARGUMENT: - case OPTION_GROUP: - default: - die("should not happen, someone must be hit on the forehead"); - } -} - -static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) -{ - for (; options->type != OPTION_END; options++) { - if (options->short_name == *p->opt) { - p->opt = p->opt[1] ? p->opt + 1 : NULL; - return get_value(p, options, OPT_SHORT); - } - } - return -2; -} - -static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, - const struct option *options) -{ - const char *arg_end = strchr(arg, '='); - const struct option *abbrev_option = NULL, *ambiguous_option = NULL; - int abbrev_flags = 0, ambiguous_flags = 0; - - if (!arg_end) - arg_end = arg + strlen(arg); - - for (; options->type != OPTION_END; options++) { - const char *rest; - int flags = 0; - - if (!options->long_name) - continue; - - rest = skip_prefix(arg, options->long_name); - if (options->type == OPTION_ARGUMENT) { - if (!rest) - continue; - if (*rest == '=') - return opterror(options, "takes no value", flags); - if (*rest) - continue; - p->out[p->cpidx++] = arg - 2; - return 0; - } - if (!rest) { - if (!prefixcmp(options->long_name, "no-")) { - /* - * The long name itself starts with "no-", so - * accept the option without "no-" so that users - * do not have to enter "no-no-" to get the - * negation. - */ - rest = skip_prefix(arg, options->long_name + 3); - if (rest) { - flags |= OPT_UNSET; - goto match; - } - /* Abbreviated case */ - if (!prefixcmp(options->long_name + 3, arg)) { - flags |= OPT_UNSET; - goto is_abbreviated; - } - } - /* abbreviated? */ - if (!strncmp(options->long_name, arg, arg_end - arg)) { -is_abbreviated: - if (abbrev_option) { - /* - * If this is abbreviated, it is - * ambiguous. So when there is no - * exact match later, we need to - * error out. - */ - ambiguous_option = abbrev_option; - ambiguous_flags = abbrev_flags; - } - if (!(flags & OPT_UNSET) && *arg_end) - p->opt = arg_end + 1; - abbrev_option = options; - abbrev_flags = flags; - continue; - } - /* negated and abbreviated very much? */ - if (!prefixcmp("no-", arg)) { - flags |= OPT_UNSET; - goto is_abbreviated; - } - /* negated? */ - if (strncmp(arg, "no-", 3)) - continue; - flags |= OPT_UNSET; - rest = skip_prefix(arg + 3, options->long_name); - /* abbreviated and negated? */ - if (!rest && !prefixcmp(options->long_name, arg + 3)) - goto is_abbreviated; - if (!rest) - continue; - } -match: - if (*rest) { - if (*rest != '=') - continue; - p->opt = rest + 1; - } - return get_value(p, options, flags); - } - - if (ambiguous_option) - return error("Ambiguous option: %s " - "(could be --%s%s or --%s%s)", - arg, - (ambiguous_flags & OPT_UNSET) ? "no-" : "", - ambiguous_option->long_name, - (abbrev_flags & OPT_UNSET) ? "no-" : "", - abbrev_option->long_name); - if (abbrev_option) - return get_value(p, abbrev_option, abbrev_flags); - return -2; -} - -static void check_typos(const char *arg, const struct option *options) -{ - if (strlen(arg) < 3) - return; - - if (!prefixcmp(arg, "no-")) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - - for (; options->type != OPTION_END; options++) { - if (!options->long_name) - continue; - if (!prefixcmp(options->long_name, arg)) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - } -} - -void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags) -{ - memset(ctx, 0, sizeof(*ctx)); - ctx->argc = argc - 1; - ctx->argv = argv + 1; - ctx->out = argv; - ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); - ctx->flags = flags; - if ((flags & PARSE_OPT_KEEP_UNKNOWN) && - (flags & PARSE_OPT_STOP_AT_NON_OPTION)) - die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); -} - -static int usage_with_options_internal(const char * const *, - const struct option *, int, - struct parse_opt_ctx_t *); - -int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]) -{ - int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); - int excl_short_opt = 1; - const char *arg; - - /* we must reset ->opt, unknown short option leave it dangling */ - ctx->opt = NULL; - - for (; ctx->argc; ctx->argc--, ctx->argv++) { - arg = ctx->argv[0]; - if (*arg != '-' || !arg[1]) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - break; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - - if (arg[1] != '-') { - ctx->opt = ++arg; - if (internal_help && *ctx->opt == 'h') { - return usage_with_options_internal(usagestr, options, 0, ctx); - } - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options, arg, 1); - case -2: - goto unknown; - case -3: - goto exclusive; - default: - break; - } - if (ctx->opt) - check_typos(arg, options); - while (ctx->opt) { - if (internal_help && *ctx->opt == 'h') - return usage_with_options_internal(usagestr, options, 0, ctx); - arg = ctx->opt; - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options, arg, 1); - case -2: - /* fake a short option thing to hide the fact that we may have - * started to parse aggregated stuff - * - * This is leaky, too bad. - */ - ctx->argv[0] = strdup(ctx->opt - 1); - *(char *)ctx->argv[0] = '-'; - goto unknown; - case -3: - goto exclusive; - default: - break; - } - } - continue; - } - - if (!arg[2]) { /* "--" */ - if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { - ctx->argc--; - ctx->argv++; - } - break; - } - - arg += 2; - if (internal_help && !strcmp(arg, "help-all")) - return usage_with_options_internal(usagestr, options, 1, ctx); - if (internal_help && !strcmp(arg, "help")) - return usage_with_options_internal(usagestr, options, 0, ctx); - if (!strcmp(arg, "list-opts")) - return PARSE_OPT_LIST_OPTS; - if (!strcmp(arg, "list-cmds")) - return PARSE_OPT_LIST_SUBCMDS; - switch (parse_long_opt(ctx, arg, options)) { - case -1: - return parse_options_usage(usagestr, options, arg, 0); - case -2: - goto unknown; - case -3: - excl_short_opt = 0; - goto exclusive; - default: - break; - } - continue; -unknown: - if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) - return PARSE_OPT_UNKNOWN; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - ctx->opt = NULL; - } - return PARSE_OPT_DONE; - -exclusive: - parse_options_usage(usagestr, options, arg, excl_short_opt); - if ((excl_short_opt && ctx->excl_opt->short_name) || - ctx->excl_opt->long_name == NULL) { - char opt = ctx->excl_opt->short_name; - parse_options_usage(NULL, options, &opt, 1); - } else { - parse_options_usage(NULL, options, ctx->excl_opt->long_name, 0); - } - return PARSE_OPT_HELP; -} - -int parse_options_end(struct parse_opt_ctx_t *ctx) -{ - memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); - ctx->out[ctx->cpidx + ctx->argc] = NULL; - return ctx->cpidx + ctx->argc; -} - -int parse_options_subcommand(int argc, const char **argv, const struct option *options, - const char *const subcommands[], const char *usagestr[], int flags) -{ - struct parse_opt_ctx_t ctx; - - perf_env__set_cmdline(&perf_env, argc, argv); - - /* build usage string if it's not provided */ - if (subcommands && !usagestr[0]) { - struct strbuf buf = STRBUF_INIT; - - strbuf_addf(&buf, "perf %s [<options>] {", argv[0]); - for (int i = 0; subcommands[i]; i++) { - if (i) - strbuf_addstr(&buf, "|"); - strbuf_addstr(&buf, subcommands[i]); - } - strbuf_addstr(&buf, "}"); - - usagestr[0] = strdup(buf.buf); - strbuf_release(&buf); - } - - parse_options_start(&ctx, argc, argv, flags); - switch (parse_options_step(&ctx, options, usagestr)) { - case PARSE_OPT_HELP: - exit(129); - case PARSE_OPT_DONE: - break; - case PARSE_OPT_LIST_OPTS: - while (options->type != OPTION_END) { - if (options->long_name) - printf("--%s ", options->long_name); - options++; - } - putchar('\n'); - exit(130); - case PARSE_OPT_LIST_SUBCMDS: - if (subcommands) { - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); - } - putchar('\n'); - exit(130); - default: /* PARSE_OPT_UNKNOWN */ - if (ctx.argv[0][1] == '-') { - strbuf_addf(&error_buf, "unknown option `%s'", - ctx.argv[0] + 2); - } else { - strbuf_addf(&error_buf, "unknown switch `%c'", - *ctx.opt); - } - usage_with_options(usagestr, options); - } - - return parse_options_end(&ctx); -} - -int parse_options(int argc, const char **argv, const struct option *options, - const char * const usagestr[], int flags) -{ - return parse_options_subcommand(argc, argv, options, NULL, - (const char **) usagestr, flags); -} - -#define USAGE_OPTS_WIDTH 24 -#define USAGE_GAP 2 - -static void print_option_help(const struct option *opts, int full) -{ - size_t pos; - int pad; - - if (opts->type == OPTION_GROUP) { - fputc('\n', stderr); - if (*opts->help) - fprintf(stderr, "%s\n", opts->help); - return; - } - if (!full && (opts->flags & PARSE_OPT_HIDDEN)) - return; - if (opts->flags & PARSE_OPT_DISABLED) - return; - - pos = fprintf(stderr, " "); - if (opts->short_name) - pos += fprintf(stderr, "-%c", opts->short_name); - else - pos += fprintf(stderr, " "); - - if (opts->long_name && opts->short_name) - pos += fprintf(stderr, ", "); - if (opts->long_name) - pos += fprintf(stderr, "--%s", opts->long_name); - - switch (opts->type) { - case OPTION_ARGUMENT: - break; - case OPTION_LONG: - case OPTION_U64: - case OPTION_INTEGER: - case OPTION_UINTEGER: - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=<n>]"); - else - pos += fprintf(stderr, "[<n>]"); - else - pos += fprintf(stderr, " <n>"); - break; - case OPTION_CALLBACK: - if (opts->flags & PARSE_OPT_NOARG) - break; - /* FALLTHROUGH */ - case OPTION_STRING: - if (opts->argh) { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=<%s>]", opts->argh); - else - pos += fprintf(stderr, "[<%s>]", opts->argh); - else - pos += fprintf(stderr, " <%s>", opts->argh); - } else { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=...]"); - else - pos += fprintf(stderr, "[...]"); - else - pos += fprintf(stderr, " ..."); - } - break; - default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */ - case OPTION_END: - case OPTION_GROUP: - case OPTION_BIT: - case OPTION_BOOLEAN: - case OPTION_INCR: - case OPTION_SET_UINT: - case OPTION_SET_PTR: - break; - } - - if (pos <= USAGE_OPTS_WIDTH) - pad = USAGE_OPTS_WIDTH - pos; - else { - fputc('\n', stderr); - pad = USAGE_OPTS_WIDTH; - } - fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); -} - -static int option__cmp(const void *va, const void *vb) -{ - const struct option *a = va, *b = vb; - int sa = tolower(a->short_name), sb = tolower(b->short_name), ret; - - if (sa == 0) - sa = 'z' + 1; - if (sb == 0) - sb = 'z' + 1; - - ret = sa - sb; - - if (ret == 0) { - const char *la = a->long_name ?: "", - *lb = b->long_name ?: ""; - ret = strcmp(la, lb); - } - - return ret; -} - -static struct option *options__order(const struct option *opts) -{ - int nr_opts = 0; - const struct option *o = opts; - struct option *ordered; - - for (o = opts; o->type != OPTION_END; o++) - ++nr_opts; - - ordered = memdup(opts, sizeof(*o) * (nr_opts + 1)); - if (ordered == NULL) - goto out; - - qsort(ordered, nr_opts, sizeof(*o), option__cmp); -out: - return ordered; -} - -static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx_t *ctx) -{ - int i; - - for (i = 1; i < ctx->argc; ++i) { - const char *arg = ctx->argv[i]; - - if (arg[0] != '-') { - if (arg[1] == '\0') { - if (arg[0] == opt->short_name) - return true; - continue; - } - - if (opt->long_name && strcmp(opt->long_name, arg) == 0) - return true; - - if (opt->help && strcasestr(opt->help, arg) != NULL) - return true; - - continue; - } - - if (arg[1] == opt->short_name || - (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0)) - return true; - } - - return false; -} - -int usage_with_options_internal(const char * const *usagestr, - const struct option *opts, int full, - struct parse_opt_ctx_t *ctx) -{ - struct option *ordered; - - if (!usagestr) - return PARSE_OPT_HELP; - - setup_pager(); - - if (strbuf_avail(&error_buf)) { - fprintf(stderr, " Error: %s\n", error_buf.buf); - strbuf_release(&error_buf); - } - - fprintf(stderr, "\n Usage: %s\n", *usagestr++); - while (*usagestr && **usagestr) - fprintf(stderr, " or: %s\n", *usagestr++); - while (*usagestr) { - fprintf(stderr, "%s%s\n", - **usagestr ? " " : "", - *usagestr); - usagestr++; - } - - if (opts->type != OPTION_GROUP) - fputc('\n', stderr); - - ordered = options__order(opts); - if (ordered) - opts = ordered; - - for ( ; opts->type != OPTION_END; opts++) { - if (ctx && ctx->argc > 1 && !option__in_argv(opts, ctx)) - continue; - print_option_help(opts, full); - } - - fputc('\n', stderr); - - free(ordered); - - return PARSE_OPT_HELP; -} - -void usage_with_options(const char * const *usagestr, - const struct option *opts) -{ - exit_browser(false); - usage_with_options_internal(usagestr, opts, 0, NULL); - exit(129); -} - -void usage_with_options_msg(const char * const *usagestr, - const struct option *opts, const char *fmt, ...) -{ - va_list ap; - - exit_browser(false); - - va_start(ap, fmt); - strbuf_addv(&error_buf, fmt, ap); - va_end(ap); - - usage_with_options_internal(usagestr, opts, 0, NULL); - exit(129); -} - -int parse_options_usage(const char * const *usagestr, - const struct option *opts, - const char *optstr, bool short_opt) -{ - if (!usagestr) - goto opt; - - fprintf(stderr, "\n Usage: %s\n", *usagestr++); - while (*usagestr && **usagestr) - fprintf(stderr, " or: %s\n", *usagestr++); - while (*usagestr) { - fprintf(stderr, "%s%s\n", - **usagestr ? " " : "", - *usagestr); - usagestr++; - } - fputc('\n', stderr); - -opt: - for ( ; opts->type != OPTION_END; opts++) { - if (short_opt) { - if (opts->short_name == *optstr) { - print_option_help(opts, 0); - break; - } - continue; - } - - if (opts->long_name == NULL) - continue; - - if (!prefixcmp(opts->long_name, optstr)) - print_option_help(opts, 0); - if (!prefixcmp("no-", optstr) && - !prefixcmp(opts->long_name, optstr + 3)) - print_option_help(opts, 0); - } - - return PARSE_OPT_HELP; -} - - -int parse_opt_verbosity_cb(const struct option *opt, - const char *arg __maybe_unused, - int unset) -{ - int *target = opt->value; - - if (unset) - /* --no-quiet, --no-verbose */ - *target = 0; - else if (opt->short_name == 'v') { - if (*target >= 0) - (*target)++; - else - *target = 1; - } else { - if (*target <= 0) - (*target)--; - else - *target = -1; - } - return 0; -} - -void set_option_flag(struct option *opts, int shortopt, const char *longopt, - int flag) -{ - for (; opts->type != OPTION_END; opts++) { - if ((shortopt && opts->short_name == shortopt) || - (opts->long_name && longopt && - !strcmp(opts->long_name, longopt))) { - opts->flags |= flag; - break; - } - } -} diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h deleted file mode 100644 index a8e407bc251e..000000000000 --- a/tools/perf/util/parse-options.h +++ /dev/null @@ -1,229 +0,0 @@ -#ifndef __PERF_PARSE_OPTIONS_H -#define __PERF_PARSE_OPTIONS_H - -#include <linux/kernel.h> -#include <stdbool.h> - -enum parse_opt_type { - /* special types */ - OPTION_END, - OPTION_ARGUMENT, - OPTION_GROUP, - /* options with no arguments */ - OPTION_BIT, - OPTION_BOOLEAN, - OPTION_INCR, - OPTION_SET_UINT, - OPTION_SET_PTR, - /* options with arguments (usually) */ - OPTION_STRING, - OPTION_INTEGER, - OPTION_LONG, - OPTION_CALLBACK, - OPTION_U64, - OPTION_UINTEGER, -}; - -enum parse_opt_flags { - PARSE_OPT_KEEP_DASHDASH = 1, - PARSE_OPT_STOP_AT_NON_OPTION = 2, - PARSE_OPT_KEEP_ARGV0 = 4, - PARSE_OPT_KEEP_UNKNOWN = 8, - PARSE_OPT_NO_INTERNAL_HELP = 16, -}; - -enum parse_opt_option_flags { - PARSE_OPT_OPTARG = 1, - PARSE_OPT_NOARG = 2, - PARSE_OPT_NONEG = 4, - PARSE_OPT_HIDDEN = 8, - PARSE_OPT_LASTARG_DEFAULT = 16, - PARSE_OPT_DISABLED = 32, - PARSE_OPT_EXCLUSIVE = 64, - PARSE_OPT_NOEMPTY = 128, -}; - -struct option; -typedef int parse_opt_cb(const struct option *, const char *arg, int unset); - -/* - * `type`:: - * holds the type of the option, you must have an OPTION_END last in your - * array. - * - * `short_name`:: - * the character to use as a short option name, '\0' if none. - * - * `long_name`:: - * the long option name, without the leading dashes, NULL if none. - * - * `value`:: - * stores pointers to the values to be filled. - * - * `argh`:: - * token to explain the kind of argument this option wants. Keep it - * homogenous across the repository. - * - * `help`:: - * the short help associated to what the option does. - * Must never be NULL (except for OPTION_END). - * OPTION_GROUP uses this pointer to store the group header. - * - * `flags`:: - * mask of parse_opt_option_flags. - * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) - * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs - * PARSE_OPT_NONEG: says that this option cannot be negated - * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in - * the long one. - * - * `callback`:: - * pointer to the callback to use for OPTION_CALLBACK. - * - * `defval`:: - * default value to fill (*->value) with for PARSE_OPT_OPTARG. - * OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in - * the value when met. - * CALLBACKS can use it like they want. - * - * `set`:: - * whether an option was set by the user - */ -struct option { - enum parse_opt_type type; - int short_name; - const char *long_name; - void *value; - const char *argh; - const char *help; - - int flags; - parse_opt_cb *callback; - intptr_t defval; - bool *set; - void *data; -}; - -#define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v ) - -#define OPT_END() { .type = OPTION_END } -#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) } -#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) } -#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) } -#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) } -#define OPT_BOOLEAN_FLAG(s, l, v, h, f) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) } -#define OPT_BOOLEAN_SET(s, l, v, os, h) \ - { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \ - .value = check_vtype(v, bool *), .help = (h), \ - .set = check_vtype(os, bool *)} -#define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } -#define OPT_SET_UINT(s, l, v, h, i) { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) } -#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) } -#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } -#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } -#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } -#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } -#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } -#define OPT_STRING_OPTARG(s, l, v, a, h, d) \ - { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ - .value = check_vtype(v, const char **), (a), .help = (h), \ - .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } -#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} -#define OPT_DATE(s, l, v, h) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } -#define OPT_CALLBACK(s, l, v, a, h, f) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f) } -#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG } -#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT } -#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\ - .value = (v), (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\ - .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG} -#define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \ - { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \ - .value = (v), (a), .help = (h), .callback = (f), \ - .flags = PARSE_OPT_OPTARG, .data = (d) } - -/* parse_options() will filter out the processed options and leave the - * non-option argments in argv[]. - * Returns the number of arguments left in argv[]. - */ -extern int parse_options(int argc, const char **argv, - const struct option *options, - const char * const usagestr[], int flags); - -extern int parse_options_subcommand(int argc, const char **argv, - const struct option *options, - const char *const subcommands[], - const char *usagestr[], int flags); - -extern NORETURN void usage_with_options(const char * const *usagestr, - const struct option *options); -extern NORETURN __attribute__((format(printf,3,4))) -void usage_with_options_msg(const char * const *usagestr, - const struct option *options, - const char *fmt, ...); - -/*----- incremantal advanced APIs -----*/ - -enum { - PARSE_OPT_HELP = -1, - PARSE_OPT_DONE, - PARSE_OPT_LIST_OPTS, - PARSE_OPT_LIST_SUBCMDS, - PARSE_OPT_UNKNOWN, -}; - -/* - * It's okay for the caller to consume argv/argc in the usual way. - * Other fields of that structure are private to parse-options and should not - * be modified in any way. - */ -struct parse_opt_ctx_t { - const char **argv; - const char **out; - int argc, cpidx; - const char *opt; - const struct option *excl_opt; - int flags; -}; - -extern int parse_options_usage(const char * const *usagestr, - const struct option *opts, - const char *optstr, - bool short_opt); - -extern void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags); - -extern int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]); - -extern int parse_options_end(struct parse_opt_ctx_t *ctx); - - -/*----- some often used options -----*/ -extern int parse_opt_abbrev_cb(const struct option *, const char *, int); -extern int parse_opt_approxidate_cb(const struct option *, const char *, int); -extern int parse_opt_verbosity_cb(const struct option *, const char *, int); - -#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") -#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") -#define OPT__VERBOSITY(var) \ - { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ - { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } -#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") -#define OPT__ABBREV(var) \ - { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ - "use <n> digits to display SHA-1s", \ - PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } - -extern const char *parse_options_fix_filename(const char *prefix, const char *file); - -void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag); -#endif /* __PERF_PARSE_OPTIONS_H */ diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index 4f2c1c255d81..646ecf736aad 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -1,7 +1,7 @@ #include "perf.h" #include "util/util.h" #include "util/debug.h" -#include "util/parse-options.h" +#include <subcmd/parse-options.h> #include "util/parse-regs-options.h" int diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 5d13cb45b317..3654d964e49d 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -22,24 +22,6 @@ static const char *get_perf_dir(void) return "."; } -/* - * If libc has strlcpy() then that version will override this - * implementation: - */ -size_t __weak strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - - memcpy(dest, src, len); - dest[len] = '\0'; - } - - return ret; -} - static char *get_pathname(void) { static char pathname_array[4][PATH_MAX]; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index e4b173dec4b9..adef23b1352e 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -98,7 +98,7 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * char scale[128]; int fd, ret = -1; char path[PATH_MAX]; - const char *lc; + char *lc; snprintf(path, PATH_MAX, "%s/%s.scale", dir, name); @@ -124,6 +124,17 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * lc = setlocale(LC_NUMERIC, NULL); /* + * The lc string may be allocated in static storage, + * so get a dynamic copy to make it survive setlocale + * call below. + */ + lc = strdup(lc); + if (!lc) { + ret = -ENOMEM; + goto error; + } + + /* * force to C locale to ensure kernel * scale string is converted correctly. * kernel uses default C locale. @@ -135,6 +146,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * /* restore locale */ setlocale(LC_NUMERIC, lc); + free(lc); + ret = 0; error: close(fd); @@ -153,7 +166,7 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n if (fd == -1) return -1; - sret = read(fd, alias->unit, UNIT_MAX_LEN); + sret = read(fd, alias->unit, UNIT_MAX_LEN); if (sret < 0) goto error; @@ -220,6 +233,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias->scale = 1.0; alias->unit[0] = '\0'; alias->per_pkg = false; + alias->snapshot = false; ret = parse_events_terms(&alias->terms, val); if (ret) { @@ -283,13 +297,12 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) { struct dirent *evt_ent; DIR *event_dir; - int ret = 0; event_dir = opendir(dir); if (!event_dir) return -EINVAL; - while (!ret && (evt_ent = readdir(event_dir))) { + while ((evt_ent = readdir(event_dir))) { char path[PATH_MAX]; char *name = evt_ent->d_name; FILE *file; @@ -305,17 +318,19 @@ static int pmu_aliases_parse(char *dir, struct list_head *head) snprintf(path, PATH_MAX, "%s/%s", dir, name); - ret = -EINVAL; file = fopen(path, "r"); - if (!file) - break; + if (!file) { + pr_debug("Cannot open %s\n", path); + continue; + } - ret = perf_pmu__new_alias(head, dir, name, file); + if (perf_pmu__new_alias(head, dir, name, file) < 0) + pr_debug("Cannot set up %s\n", name); fclose(file); } closedir(event_dir); - return ret; + return 0; } /* @@ -353,7 +368,7 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, list_for_each_entry(term, &alias->terms, list) { ret = parse_events_term__clone(&cloned, term); if (ret) { - parse_events__free_terms(&list); + parse_events_terms__purge(&list); return ret; } list_add_tail(&cloned->list, &list); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b51a8bfb40f9..93996ec4bbe3 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1895,9 +1895,8 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, sym = map__find_symbol(map, addr, NULL); } else { if (tp->symbol && !addr) { - ret = kernel_get_symbol_address_by_name(tp->symbol, - &addr, true, false); - if (ret < 0) + if (kernel_get_symbol_address_by_name(tp->symbol, + &addr, true, false) < 0) goto out; } if (addr) { @@ -1905,6 +1904,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, sym = __find_kernel_function(addr, &map); } } + if (!sym) goto out; @@ -2326,8 +2326,11 @@ static int get_new_event_name(char *buf, size_t len, const char *base, goto out; if (!allow_suffix) { - pr_warning("Error: event \"%s\" already exists. " - "(Use -f to force duplicates.)\n", buf); + pr_warning("Error: event \"%s\" already exists.\n" + " Hint: Remove existing event by 'perf probe -d'\n" + " or force duplicates by 'perf probe -f'\n" + " or set 'force=yes' in BPF source.\n", + buf); ret = -EEXIST; goto out; } diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 89dbeb92c68e..e3b3b92e4458 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -138,6 +138,9 @@ struct strlist *probe_file__get_rawlist(int fd) char *p; struct strlist *sl; + if (fd < 0) + return NULL; + sl = strlist__new(NULL, NULL); fp = fdopen(dup(fd), "r"); @@ -271,6 +274,9 @@ int probe_file__get_events(int fd, struct strfilter *filter, const char *p; int ret = -ENOENT; + if (!plist) + return -EINVAL; + namelist = __probe_file__get_namelist(fd, true); if (!namelist) return -ENOENT; diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index bd8f03de5e40..4ce5c5e18f48 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -654,6 +654,7 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) { Dwarf_Attribute fb_attr; + Dwarf_Frame *frame = NULL; size_t nops; int ret; @@ -685,12 +686,13 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) pf->fb_ops = NULL; #if _ELFUTILS_PREREQ(0, 142) } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && - pf->cfi != NULL) { - Dwarf_Frame *frame; - if (dwarf_cfi_addrframe(pf->cfi, pf->addr, &frame) != 0 || + (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { + if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && + (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) || dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) { pr_warning("Failed to get call frame on 0x%jx\n", (uintmax_t)pf->addr); + free(frame); return -ENOENT; } #endif @@ -699,7 +701,8 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) /* Call finder's callback handler */ ret = pf->callback(sc_die, pf); - /* *pf->fb_ops will be cached in libdw. Don't free it. */ + /* Since *pf->fb_ops can be a part of frame. we should free it here. */ + free(frame); pf->fb_ops = NULL; return ret; @@ -1013,8 +1016,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) return DWARF_CB_OK; } -/* Find probe points from debuginfo */ -static int debuginfo__find_probes(struct debuginfo *dbg, +static int debuginfo__find_probe_location(struct debuginfo *dbg, struct probe_finder *pf) { struct perf_probe_point *pp = &pf->pev->point; @@ -1023,27 +1025,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, Dwarf_Die *diep; int ret = 0; -#if _ELFUTILS_PREREQ(0, 142) - Elf *elf; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - - /* Get the call frame information from this dwarf */ - elf = dwarf_getelf(dbg->dbg); - if (elf == NULL) - return -EINVAL; - - if (gelf_getehdr(elf, &ehdr) == NULL) - return -EINVAL; - - if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && - shdr.sh_type == SHT_PROGBITS) { - pf->cfi = dwarf_getcfi_elf(elf); - } else { - pf->cfi = dwarf_getcfi(dbg->dbg); - } -#endif - off = 0; pf->lcache = intlist__new(NULL); if (!pf->lcache) @@ -1106,6 +1087,39 @@ found: return ret; } +/* Find probe points from debuginfo */ +static int debuginfo__find_probes(struct debuginfo *dbg, + struct probe_finder *pf) +{ + int ret = 0; + +#if _ELFUTILS_PREREQ(0, 142) + Elf *elf; + GElf_Ehdr ehdr; + GElf_Shdr shdr; + + if (pf->cfi_eh || pf->cfi_dbg) + return debuginfo__find_probe_location(dbg, pf); + + /* Get the call frame information from this dwarf */ + elf = dwarf_getelf(dbg->dbg); + if (elf == NULL) + return -EINVAL; + + if (gelf_getehdr(elf, &ehdr) == NULL) + return -EINVAL; + + if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) && + shdr.sh_type == SHT_PROGBITS) + pf->cfi_eh = dwarf_getcfi_elf(elf); + + pf->cfi_dbg = dwarf_getcfi(dbg->dbg); +#endif + + ret = debuginfo__find_probe_location(dbg, pf); + return ret; +} + struct local_vars_finder { struct probe_finder *pf; struct perf_probe_arg *args; @@ -1183,7 +1197,7 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) container_of(pf, struct trace_event_finder, pf); struct perf_probe_point *pp = &pf->pev->point; struct probe_trace_event *tev; - struct perf_probe_arg *args; + struct perf_probe_arg *args = NULL; int ret, i; /* Check number of tevs */ @@ -1198,19 +1212,23 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr, pp->retprobe, pp->function, &tev->point); if (ret < 0) - return ret; + goto end; tev->point.realname = strdup(dwarf_diename(sc_die)); - if (!tev->point.realname) - return -ENOMEM; + if (!tev->point.realname) { + ret = -ENOMEM; + goto end; + } pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); /* Expand special probe argument if exist */ args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS); - if (args == NULL) - return -ENOMEM; + if (args == NULL) { + ret = -ENOMEM; + goto end; + } ret = expand_probe_args(sc_die, pf, args); if (ret < 0) @@ -1234,6 +1252,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) } end: + if (ret) { + clear_probe_trace_event(tev); + tf->ntevs--; + } free(args); return ret; } @@ -1246,7 +1268,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, struct trace_event_finder tf = { .pf = {.pev = pev, .callback = add_probe_trace_event}, .max_tevs = probe_conf.max_probes, .mod = dbg->mod}; - int ret; + int ret, i; /* Allocate result tevs array */ *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs); @@ -1258,6 +1280,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, ret = debuginfo__find_probes(dbg, &tf.pf); if (ret < 0) { + for (i = 0; i < tf.ntevs; i++) + clear_probe_trace_event(&tf.tevs[i]); zfree(tevs); return ret; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index bed82716e1b4..0aec7704e395 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -76,7 +76,10 @@ struct probe_finder { /* For variable searching */ #if _ELFUTILS_PREREQ(0, 142) - Dwarf_CFI *cfi; /* Call Frame Information */ + /* Call Frame Information from .eh_frame */ + Dwarf_CFI *cfi_eh; + /* Call Frame Information from .debug_frame */ + Dwarf_CFI *cfi_dbg; #endif Dwarf_Op *fb_ops; /* Frame base attribute */ struct perf_probe_arg *pvar; /* Current target variable */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 51be28b1bca2..8162ba0e2e57 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,8 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +../lib/bitmap.c +../lib/find_bit.c ../lib/hweight.c util/thread_map.c util/util.c diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c deleted file mode 100644 index 34622b53e733..000000000000 --- a/tools/perf/util/run-command.c +++ /dev/null @@ -1,219 +0,0 @@ -#include "cache.h" -#include "run-command.h" -#include "exec_cmd.h" -#include "debug.h" - -static inline void close_pair(int fd[2]) -{ - close(fd[0]); - close(fd[1]); -} - -static inline void dup_devnull(int to) -{ - int fd = open("/dev/null", O_RDWR); - dup2(fd, to); - close(fd); -} - -int start_command(struct child_process *cmd) -{ - int need_in, need_out, need_err; - int fdin[2], fdout[2], fderr[2]; - char sbuf[STRERR_BUFSIZE]; - - /* - * In case of errors we must keep the promise to close FDs - * that have been passed in via ->in and ->out. - */ - - need_in = !cmd->no_stdin && cmd->in < 0; - if (need_in) { - if (pipe(fdin) < 0) { - if (cmd->out > 0) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->in = fdin[1]; - } - - need_out = !cmd->no_stdout - && !cmd->stdout_to_stderr - && cmd->out < 0; - if (need_out) { - if (pipe(fdout) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->out = fdout[0]; - } - - need_err = !cmd->no_stderr && cmd->err < 0; - if (need_err) { - if (pipe(fderr) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->err = fderr[0]; - } - - fflush(NULL); - cmd->pid = fork(); - if (!cmd->pid) { - if (cmd->no_stdin) - dup_devnull(0); - else if (need_in) { - dup2(fdin[0], 0); - close_pair(fdin); - } else if (cmd->in) { - dup2(cmd->in, 0); - close(cmd->in); - } - - if (cmd->no_stderr) - dup_devnull(2); - else if (need_err) { - dup2(fderr[1], 2); - close_pair(fderr); - } - - if (cmd->no_stdout) - dup_devnull(1); - else if (cmd->stdout_to_stderr) - dup2(2, 1); - else if (need_out) { - dup2(fdout[1], 1); - close_pair(fdout); - } else if (cmd->out > 1) { - dup2(cmd->out, 1); - close(cmd->out); - } - - if (cmd->dir && chdir(cmd->dir)) - die("exec %s: cd to %s failed (%s)", cmd->argv[0], - cmd->dir, strerror_r(errno, sbuf, sizeof(sbuf))); - if (cmd->env) { - for (; *cmd->env; cmd->env++) { - if (strchr(*cmd->env, '=')) - putenv((char*)*cmd->env); - else - unsetenv(*cmd->env); - } - } - if (cmd->preexec_cb) - cmd->preexec_cb(); - if (cmd->perf_cmd) { - execv_perf_cmd(cmd->argv); - } else { - execvp(cmd->argv[0], (char *const*) cmd->argv); - } - exit(127); - } - - if (cmd->pid < 0) { - int err = errno; - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - if (need_err) - close_pair(fderr); - return err == ENOENT ? - -ERR_RUN_COMMAND_EXEC : - -ERR_RUN_COMMAND_FORK; - } - - if (need_in) - close(fdin[0]); - else if (cmd->in) - close(cmd->in); - - if (need_out) - close(fdout[1]); - else if (cmd->out) - close(cmd->out); - - if (need_err) - close(fderr[1]); - - return 0; -} - -static int wait_or_whine(pid_t pid) -{ - char sbuf[STRERR_BUFSIZE]; - - for (;;) { - int status, code; - pid_t waiting = waitpid(pid, &status, 0); - - if (waiting < 0) { - if (errno == EINTR) - continue; - error("waitpid failed (%s)", - strerror_r(errno, sbuf, sizeof(sbuf))); - return -ERR_RUN_COMMAND_WAITPID; - } - if (waiting != pid) - return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; - if (WIFSIGNALED(status)) - return -ERR_RUN_COMMAND_WAITPID_SIGNAL; - - if (!WIFEXITED(status)) - return -ERR_RUN_COMMAND_WAITPID_NOEXIT; - code = WEXITSTATUS(status); - switch (code) { - case 127: - return -ERR_RUN_COMMAND_EXEC; - case 0: - return 0; - default: - return -code; - } - } -} - -int finish_command(struct child_process *cmd) -{ - return wait_or_whine(cmd->pid); -} - -int run_command(struct child_process *cmd) -{ - int code = start_command(cmd); - if (code) - return code; - return finish_command(cmd); -} - -static void prepare_run_command_v_opt(struct child_process *cmd, - const char **argv, - int opt) -{ - memset(cmd, 0, sizeof(*cmd)); - cmd->argv = argv; - cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; - cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; - cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; -} - -int run_command_v_opt(const char **argv, int opt) -{ - struct child_process cmd; - prepare_run_command_v_opt(&cmd, argv, opt); - return run_command(&cmd); -} diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h deleted file mode 100644 index 1ef264d5069c..000000000000 --- a/tools/perf/util/run-command.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef __PERF_RUN_COMMAND_H -#define __PERF_RUN_COMMAND_H - -enum { - ERR_RUN_COMMAND_FORK = 10000, - ERR_RUN_COMMAND_EXEC, - ERR_RUN_COMMAND_PIPE, - ERR_RUN_COMMAND_WAITPID, - ERR_RUN_COMMAND_WAITPID_WRONG_PID, - ERR_RUN_COMMAND_WAITPID_SIGNAL, - ERR_RUN_COMMAND_WAITPID_NOEXIT, -}; -#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) - -struct child_process { - const char **argv; - pid_t pid; - /* - * Using .in, .out, .err: - * - Specify 0 for no redirections (child inherits stdin, stdout, - * stderr from parent). - * - Specify -1 to have a pipe allocated as follows: - * .in: returns the writable pipe end; parent writes to it, - * the readable pipe end becomes child's stdin - * .out, .err: returns the readable pipe end; parent reads from - * it, the writable pipe end becomes child's stdout/stderr - * The caller of start_command() must close the returned FDs - * after it has completed reading from/writing to it! - * - Specify > 0 to set a channel to a particular FD as follows: - * .in: a readable FD, becomes child's stdin - * .out: a writable FD, becomes child's stdout/stderr - * .err > 0 not supported - * The specified FD is closed by start_command(), even in case - * of errors! - */ - int in; - int out; - int err; - const char *dir; - const char *const *env; - unsigned no_stdin:1; - unsigned no_stdout:1; - unsigned no_stderr:1; - unsigned perf_cmd:1; /* if this is to be perf sub-command */ - unsigned stdout_to_stderr:1; - void (*preexec_cb)(void); -}; - -int start_command(struct child_process *); -int finish_command(struct child_process *); -int run_command(struct child_process *); - -#define RUN_COMMAND_NO_STDIN 1 -#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ -#define RUN_COMMAND_STDOUT_TO_STDERR 4 -int run_command_v_opt(const char **argv, int opt); - -#endif /* __PERF_RUN_COMMAND_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 544509c159ce..b3aabc0d4eb0 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -187,6 +187,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index a8e825fca42a..fbd05242b4e5 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -41,6 +41,9 @@ #include "../thread-stack.h" #include "../trace-event.h" #include "../machine.h" +#include "thread_map.h" +#include "cpumap.h" +#include "stat.h" PyMODINIT_FUNC initperf_trace_context(void); @@ -202,6 +205,9 @@ static void define_event_symbols(struct event_format *event, const char *ev_name, struct print_arg *args) { + if (args == NULL) + return; + switch (args->type) { case PRINT_NULL: break; @@ -859,6 +865,104 @@ static void python_process_event(union perf_event *event, } } +static void get_handler_name(char *str, size_t size, + struct perf_evsel *evsel) +{ + char *p = str; + + scnprintf(str, size, "stat__%s", perf_evsel__name(evsel)); + + while ((p = strchr(p, ':'))) { + *p = '_'; + p++; + } +} + +static void +process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp, + struct perf_counts_values *count) +{ + PyObject *handler, *t; + static char handler_name[256]; + int n = 0; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + get_handler_name(handler_name, sizeof(handler_name), + counter); + + handler = get_handler(handler_name); + if (!handler) { + pr_debug("can't find python handler %s\n", handler_name); + return; + } + + PyTuple_SetItem(t, n++, PyInt_FromLong(cpu)); + PyTuple_SetItem(t, n++, PyInt_FromLong(thread)); + + tuple_set_u64(t, n++, tstamp); + tuple_set_u64(t, n++, count->val); + tuple_set_u64(t, n++, count->ena); + tuple_set_u64(t, n++, count->run); + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + call_object(handler, t, handler_name); + + Py_DECREF(t); +} + +static void python_process_stat(struct perf_stat_config *config, + struct perf_evsel *counter, u64 tstamp) +{ + struct thread_map *threads = counter->threads; + struct cpu_map *cpus = counter->cpus; + int cpu, thread; + + if (config->aggr_mode == AGGR_GLOBAL) { + process_stat(counter, -1, -1, tstamp, + &counter->counts->aggr); + return; + } + + for (thread = 0; thread < threads->nr; thread++) { + for (cpu = 0; cpu < cpus->nr; cpu++) { + process_stat(counter, cpus->map[cpu], + thread_map__pid(threads, thread), tstamp, + perf_counts(counter->counts, cpu, thread)); + } + } +} + +static void python_process_stat_interval(u64 tstamp) +{ + PyObject *handler, *t; + static const char handler_name[] = "stat__interval"; + int n = 0; + + t = PyTuple_New(MAX_FIELDS); + if (!t) + Py_FatalError("couldn't create Python tuple"); + + handler = get_handler(handler_name); + if (!handler) { + pr_debug("can't find python handler %s\n", handler_name); + return; + } + + tuple_set_u64(t, n++, tstamp); + + if (_PyTuple_Resize(&t, n) == -1) + Py_FatalError("error resizing Python tuple"); + + call_object(handler, t, handler_name); + + Py_DECREF(t); +} + static int run_start_sub(void) { main_module = PyImport_AddModule("__main__"); @@ -990,8 +1094,6 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } - free(command_line); - set_table_handlers(tables); if (tables->db_export_mode) { @@ -1000,6 +1102,8 @@ static int python_start_script(const char *script, int argc, const char **argv) goto error; } + free(command_line); + return err; error: Py_Finalize(); @@ -1201,10 +1305,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) } struct scripting_ops python_scripting_ops = { - .name = "Python", - .start_script = python_start_script, - .flush_script = python_flush_script, - .stop_script = python_stop_script, - .process_event = python_process_event, - .generate_script = python_generate_script, + .name = "Python", + .start_script = python_start_script, + .flush_script = python_flush_script, + .stop_script = python_stop_script, + .process_event = python_process_event, + .process_stat = python_process_stat, + .process_stat_interval = python_process_stat_interval, + .generate_script = python_generate_script, }; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 428149bc64d2..60b3593d210d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -17,6 +17,7 @@ #include "asm/bug.h" #include "auxtrace.h" #include "thread-stack.h" +#include "stat.h" static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, @@ -29,25 +30,28 @@ static int perf_session__open(struct perf_session *session) struct perf_data_file *file = session->file; if (perf_session__read_header(session) < 0) { - pr_err("incompatible file format (rerun with -v to learn more)"); + pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1; } if (perf_data_file__is_pipe(file)) return 0; + if (perf_header__has_feat(&session->header, HEADER_STAT)) + return 0; + if (!perf_evlist__valid_sample_type(session->evlist)) { - pr_err("non matching sample_type"); + pr_err("non matching sample_type\n"); return -1; } if (!perf_evlist__valid_sample_id_all(session->evlist)) { - pr_err("non matching sample_id_all"); + pr_err("non matching sample_id_all\n"); return -1; } if (!perf_evlist__valid_read_format(session->evlist)) { - pr_err("non matching read_format"); + pr_err("non matching read_format\n"); return -1; } @@ -205,6 +209,18 @@ static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_evlist **pevlist + __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_event_update(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + static int process_event_sample_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -224,14 +240,6 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_build_id_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe __maybe_unused) @@ -244,23 +252,6 @@ static int process_finished_round(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); -static int process_id_index_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - -static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) -{ - dump_printf(": unhandled!\n"); - return 0; -} - static int skipn(int fd, off_t n) { char buf[4096]; @@ -287,11 +278,71 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, return event->auxtrace.size; } +static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + + static -int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_thread_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_cpu_map(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static +int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_stat_config(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) { + if (dump_trace) + perf_event__fprintf_stat(event, stdout); + + dump_printf(": unhandled!\n"); + return 0; +} + +static int process_stat_round_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *perf_session + __maybe_unused) +{ + if (dump_trace) + perf_event__fprintf_stat_round(event, stdout); + dump_printf(": unhandled!\n"); return 0; } @@ -328,10 +379,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->unthrottle = process_event_stub; if (tool->attr == NULL) tool->attr = process_event_synth_attr_stub; + if (tool->event_update == NULL) + tool->event_update = process_event_synth_event_update_stub; if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_build_id_stub; + tool->build_id = process_event_op2_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -339,13 +392,23 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } if (tool->id_index == NULL) - tool->id_index = process_id_index_stub; + tool->id_index = process_event_op2_stub; if (tool->auxtrace_info == NULL) - tool->auxtrace_info = process_event_auxtrace_info_stub; + tool->auxtrace_info = process_event_op2_stub; if (tool->auxtrace == NULL) tool->auxtrace = process_event_auxtrace_stub; if (tool->auxtrace_error == NULL) - tool->auxtrace_error = process_event_auxtrace_error_stub; + tool->auxtrace_error = process_event_op2_stub; + if (tool->thread_map == NULL) + tool->thread_map = process_event_thread_map_stub; + if (tool->cpu_map == NULL) + tool->cpu_map = process_event_cpu_map_stub; + if (tool->stat_config == NULL) + tool->stat_config = process_event_stat_config_stub; + if (tool->stat == NULL) + tool->stat = process_stat_stub; + if (tool->stat_round == NULL) + tool->stat_round = process_stat_round_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -569,6 +632,13 @@ static void perf_event__hdr_attr_swap(union perf_event *event, mem_bswap_64(event->attr.id, size); } +static void perf_event__event_update_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->event_update.type = bswap_64(event->event_update.type); + event->event_update.id = bswap_64(event->event_update.id); +} + static void perf_event__event_type_swap(union perf_event *event, bool sample_id_all __maybe_unused) { @@ -616,6 +686,81 @@ static void perf_event__auxtrace_error_swap(union perf_event *event, event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); } +static void perf_event__thread_map_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + unsigned i; + + event->thread_map.nr = bswap_64(event->thread_map.nr); + + for (i = 0; i < event->thread_map.nr; i++) + event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid); +} + +static void perf_event__cpu_map_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + struct cpu_map_data *data = &event->cpu_map.data; + struct cpu_map_entries *cpus; + struct cpu_map_mask *mask; + unsigned i; + + data->type = bswap_64(data->type); + + switch (data->type) { + case PERF_CPU_MAP__CPUS: + cpus = (struct cpu_map_entries *)data->data; + + cpus->nr = bswap_16(cpus->nr); + + for (i = 0; i < cpus->nr; i++) + cpus->cpu[i] = bswap_16(cpus->cpu[i]); + break; + case PERF_CPU_MAP__MASK: + mask = (struct cpu_map_mask *) data->data; + + mask->nr = bswap_16(mask->nr); + mask->long_size = bswap_16(mask->long_size); + + switch (mask->long_size) { + case 4: mem_bswap_32(&mask->mask, mask->nr); break; + case 8: mem_bswap_64(&mask->mask, mask->nr); break; + default: + pr_err("cpu_map swap: unsupported long size\n"); + } + default: + break; + } +} + +static void perf_event__stat_config_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + u64 size; + + size = event->stat_config.nr * sizeof(event->stat_config.data[0]); + size += 1; /* nr item itself */ + mem_bswap_64(&event->stat_config.nr, size); +} + +static void perf_event__stat_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->stat.id = bswap_64(event->stat.id); + event->stat.thread = bswap_32(event->stat.thread); + event->stat.cpu = bswap_32(event->stat.cpu); + event->stat.val = bswap_64(event->stat.val); + event->stat.ena = bswap_64(event->stat.ena); + event->stat.run = bswap_64(event->stat.run); +} + +static void perf_event__stat_round_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->stat_round.type = bswap_64(event->stat_round.type); + event->stat_round.time = bswap_64(event->stat_round.time); +} + typedef void (*perf_event__swap_op)(union perf_event *event, bool sample_id_all); @@ -643,6 +788,12 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap, [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap, [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap, + [PERF_RECORD_THREAD_MAP] = perf_event__thread_map_swap, + [PERF_RECORD_CPU_MAP] = perf_event__cpu_map_swap, + [PERF_RECORD_STAT_CONFIG] = perf_event__stat_config_swap, + [PERF_RECORD_STAT] = perf_event__stat_swap, + [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, + [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -972,7 +1123,7 @@ static struct machine *machines__find_for_cpumode(struct machines *machines, machine = machines__find(machines, pid); if (!machine) - machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); + machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } @@ -1154,6 +1305,8 @@ static s64 perf_session__process_user_event(struct perf_session *session, perf_session__set_comm_exec(session); } return err; + case PERF_RECORD_EVENT_UPDATE: + return tool->event_update(tool, event, &session->evlist); case PERF_RECORD_HEADER_EVENT_TYPE: /* * Depreceated, but we need to handle it for sake @@ -1179,6 +1332,16 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); return tool->auxtrace_error(tool, event, session); + case PERF_RECORD_THREAD_MAP: + return tool->thread_map(tool, event, session); + case PERF_RECORD_CPU_MAP: + return tool->cpu_map(tool, event, session); + case PERF_RECORD_STAT_CONFIG: + return tool->stat_config(tool, event, session); + case PERF_RECORD_STAT: + return tool->stat(tool, event, session); + case PERF_RECORD_STAT_ROUND: + return tool->stat_round(tool, event, session); default: return -EINVAL; } @@ -1311,17 +1474,20 @@ struct thread *perf_session__findnew(struct perf_session *session, pid_t pid) return machine__findnew_thread(&session->machines.host, -1, pid); } -struct thread *perf_session__register_idle_thread(struct perf_session *session) +int perf_session__register_idle_thread(struct perf_session *session) { struct thread *thread; + int err = 0; thread = machine__findnew_thread(&session->machines.host, 0, 0); if (thread == NULL || thread__set_comm(thread, "swapper", 0)) { pr_err("problem inserting idle task.\n"); - thread = NULL; + err = -1; } - return thread; + /* machine__findnew_thread() got the thread, so put it */ + thread__put(thread); + return err; } static void perf_session__warn_about_errors(const struct perf_session *session) @@ -1676,7 +1842,7 @@ int perf_session__process_events(struct perf_session *session) u64 size = perf_data_file__size(session->file); int err; - if (perf_session__register_idle_thread(session) == NULL) + if (perf_session__register_idle_thread(session) < 0) return -ENOMEM; if (!perf_data_file__is_pipe(session->file)) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 3e900c0efc73..5f792e35d4c1 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -89,7 +89,7 @@ struct machine *perf_session__findnew_machine(struct perf_session *session, pid_ } struct thread *perf_session__findnew(struct perf_session *session, pid_t pid); -struct thread *perf_session__register_idle_thread(struct perf_session *session); +int perf_session__register_idle_thread(struct perf_session *session); size_t perf_session__fprintf(struct perf_session *session, FILE *fp); diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 1833103768cb..c8680984d2d6 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -22,6 +22,7 @@ cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +src_perf = getenv('srctree') + '/tools/perf' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') @@ -30,6 +31,9 @@ libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] +# use full paths with source files +ext_sources = map(lambda x: '%s/%s' % (src_perf, x) , ext_sources) + perf = Extension('perf', sources = ext_sources, include_dirs = ['util/include'], diff --git a/tools/perf/util/sigchain.c b/tools/perf/util/sigchain.c deleted file mode 100644 index ba785e9b1841..000000000000 --- a/tools/perf/util/sigchain.c +++ /dev/null @@ -1,52 +0,0 @@ -#include "sigchain.h" -#include "cache.h" - -#define SIGCHAIN_MAX_SIGNALS 32 - -struct sigchain_signal { - sigchain_fun *old; - int n; - int alloc; -}; -static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS]; - -static void check_signum(int sig) -{ - if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS) - die("BUG: signal out of range: %d", sig); -} - -static int sigchain_push(int sig, sigchain_fun f) -{ - struct sigchain_signal *s = signals + sig; - check_signum(sig); - - ALLOC_GROW(s->old, s->n + 1, s->alloc); - s->old[s->n] = signal(sig, f); - if (s->old[s->n] == SIG_ERR) - return -1; - s->n++; - return 0; -} - -int sigchain_pop(int sig) -{ - struct sigchain_signal *s = signals + sig; - check_signum(sig); - if (s->n < 1) - return 0; - - if (signal(sig, s->old[s->n - 1]) == SIG_ERR) - return -1; - s->n--; - return 0; -} - -void sigchain_push_common(sigchain_fun f) -{ - sigchain_push(SIGINT, f); - sigchain_push(SIGHUP, f); - sigchain_push(SIGTERM, f); - sigchain_push(SIGQUIT, f); - sigchain_push(SIGPIPE, f); -} diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h deleted file mode 100644 index 959d64eb5557..000000000000 --- a/tools/perf/util/sigchain.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __PERF_SIGCHAIN_H -#define __PERF_SIGCHAIN_H - -typedef void (*sigchain_fun)(int); - -int sigchain_pop(int sig); - -void sigchain_push_common(sigchain_fun f); - -#endif /* __PERF_SIGCHAIN_H */ diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 2d8ccd4d9e1b..93fa136b0025 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -4,6 +4,9 @@ #include "comm.h" #include "symbol.h" #include "evsel.h" +#include "evlist.h" +#include <traceevent/event-parse.h> +#include "mem-events.h" regex_t parent_regex; const char default_parent_pattern[] = "^sys_|^do_page_fault"; @@ -13,6 +16,7 @@ const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cy const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; const char default_top_sort_order[] = "dso,symbol"; const char default_diff_sort_order[] = "dso,symbol"; +const char default_tracepoint_sort_order[] = "trace"; const char *sort_order; const char *field_order; regex_t ignore_callees_regex; @@ -22,9 +26,19 @@ int sort__has_parent = 0; int sort__has_sym = 0; int sort__has_dso = 0; int sort__has_socket = 0; +int sort__has_thread = 0; +int sort__has_comm = 0; enum sort_mode sort__mode = SORT_MODE__NORMAL; - +/* + * Replaces all occurrences of a char used with the: + * + * -t, --field-separator + * + * option, that uses a special separator character and don't pad with spaces, + * replacing all occurances of this separator in symbol names (and other + * output) with a '.' character, that thus it's the only non valid separator. +*/ static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...) { int n; @@ -77,10 +91,21 @@ static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf, width, width, comm ?: ""); } +static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct thread *th = arg; + + if (type != HIST_FILTER__THREAD) + return -1; + + return th && he->thread != th; +} + struct sort_entry sort_thread = { .se_header = " Pid:Command", .se_cmp = sort__thread_cmp, .se_snprintf = hist_entry__thread_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_THREAD, }; @@ -118,6 +143,7 @@ struct sort_entry sort_comm = { .se_collapse = sort__comm_collapse, .se_sort = sort__comm_sort, .se_snprintf = hist_entry__comm_snprintf, + .se_filter = hist_entry__thread_filter, .se_width_idx = HISTC_COMM, }; @@ -167,10 +193,21 @@ static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf, return _hist_entry__dso_snprintf(he->ms.map, bf, size, width); } +static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->ms.map || he->ms.map->dso != dso); +} + struct sort_entry sort_dso = { .se_header = "Shared Object", .se_cmp = sort__dso_cmp, .se_snprintf = hist_entry__dso_snprintf, + .se_filter = hist_entry__dso_filter, .se_width_idx = HISTC_DSO, }; @@ -243,10 +280,8 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } else { - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", + ret += repsep_snprintf(bf + ret, size - ret, "%.*s", width - ret, sym->name); } @@ -254,14 +289,9 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, size_t len = BITS_PER_LONG / 4; ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, ip); - ret += repsep_snprintf(bf + ret, size - ret, "%-*s", - width - ret, ""); } - if (ret > width) - bf[width] = '\0'; - - return width; + return ret; } static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, @@ -271,46 +301,56 @@ static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf, he->level, bf, size, width); } +static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym)); +} + struct sort_entry sort_sym = { .se_header = "Symbol", .se_cmp = sort__sym_cmp, .se_sort = sort__sym_sort, .se_snprintf = hist_entry__sym_snprintf, + .se_filter = hist_entry__sym_filter, .se_width_idx = HISTC_SYMBOL, }; /* --sort srcline */ +static char *hist_entry__get_srcline(struct hist_entry *he) +{ + struct map *map = he->ms.map; + + if (!map) + return SRCLINE_UNKNOWN; + + return get_srcline(map->dso, map__rip_2objdump(map, he->ip), + he->ms.sym, true); +} + static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcline) { - if (!left->ms.map) - left->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = left->ms.map; - left->srcline = get_srcline(map->dso, - map__rip_2objdump(map, left->ip), - left->ms.sym, true); - } - } - if (!right->srcline) { - if (!right->ms.map) - right->srcline = SRCLINE_UNKNOWN; - else { - struct map *map = right->ms.map; - right->srcline = get_srcline(map->dso, - map__rip_2objdump(map, right->ip), - right->ms.sym, true); - } - } + if (!left->srcline) + left->srcline = hist_entry__get_srcline(left); + if (!right->srcline) + right->srcline = hist_entry__get_srcline(right); + return strcmp(right->srcline, left->srcline); } static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcline); + if (!he->srcline) + he->srcline = hist_entry__get_srcline(he); + + return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); } struct sort_entry sort_srcline = { @@ -324,11 +364,14 @@ struct sort_entry sort_srcline = { static char no_srcfile[1]; -static char *get_srcfile(struct hist_entry *e) +static char *hist_entry__get_srcfile(struct hist_entry *e) { char *sf, *p; struct map *map = e->ms.map; + if (!map) + return no_srcfile; + sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), e->ms.sym, false, true); if (!strcmp(sf, SRCLINE_UNKNOWN)) @@ -345,25 +388,21 @@ static char *get_srcfile(struct hist_entry *e) static int64_t sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->srcfile) { - if (!left->ms.map) - left->srcfile = no_srcfile; - else - left->srcfile = get_srcfile(left); - } - if (!right->srcfile) { - if (!right->ms.map) - right->srcfile = no_srcfile; - else - right->srcfile = get_srcfile(right); - } + if (!left->srcfile) + left->srcfile = hist_entry__get_srcfile(left); + if (!right->srcfile) + right->srcfile = hist_entry__get_srcfile(right); + return strcmp(right->srcfile, left->srcfile); } static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); + if (!he->srcfile) + he->srcfile = hist_entry__get_srcfile(he); + + return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile); } struct sort_entry sort_srcfile = { @@ -436,13 +475,85 @@ static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket); } +static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg) +{ + int sk = *(const int *)arg; + + if (type != HIST_FILTER__SOCKET) + return -1; + + return sk >= 0 && he->socket != sk; +} + struct sort_entry sort_socket = { .se_header = "Socket", .se_cmp = sort__socket_cmp, .se_snprintf = hist_entry__socket_snprintf, + .se_filter = hist_entry__socket_filter, .se_width_idx = HISTC_SOCKET, }; +/* --sort trace */ + +static char *get_trace_output(struct hist_entry *he) +{ + struct trace_seq seq; + struct perf_evsel *evsel; + struct pevent_record rec = { + .data = he->raw_data, + .size = he->raw_size, + }; + + evsel = hists_to_evsel(he->hists); + + trace_seq_init(&seq); + if (symbol_conf.raw_trace) { + pevent_print_fields(&seq, he->raw_data, he->raw_size, + evsel->tp_format); + } else { + pevent_event_info(&seq, evsel->tp_format, &rec); + } + return seq.buffer; +} + +static int64_t +sort__trace_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct perf_evsel *evsel; + + evsel = hists_to_evsel(left->hists); + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + return 0; + + if (left->trace_output == NULL) + left->trace_output = get_trace_output(left); + if (right->trace_output == NULL) + right->trace_output = get_trace_output(right); + + return strcmp(right->trace_output, left->trace_output); +} + +static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + struct perf_evsel *evsel; + + evsel = hists_to_evsel(he->hists); + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + return scnprintf(bf, size, "%-.*s", width, "N/A"); + + if (he->trace_output == NULL) + he->trace_output = get_trace_output(he); + return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output); +} + +struct sort_entry sort_trace = { + .se_header = "Trace output", + .se_cmp = sort__trace_cmp, + .se_snprintf = hist_entry__trace_snprintf, + .se_width_idx = HISTC_TRACE, +}; + /* sort keys for branch stacks */ static int64_t @@ -465,6 +576,18 @@ static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->from.map || + he->branch_info->from.map->dso != dso); +} + static int64_t sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -485,6 +608,18 @@ static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__dso_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const struct dso *dso = arg; + + if (type != HIST_FILTER__DSO) + return -1; + + return dso && (!he->branch_info || !he->branch_info->to.map || + he->branch_info->to.map->dso != dso); +} + static int64_t sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -546,10 +681,35 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); } +static int hist_entry__sym_from_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->from.sym && + strstr(he->branch_info->from.sym->name, sym)); +} + +static int hist_entry__sym_to_filter(struct hist_entry *he, int type, + const void *arg) +{ + const char *sym = arg; + + if (type != HIST_FILTER__SYMBOL) + return -1; + + return sym && !(he->branch_info && he->branch_info->to.sym && + strstr(he->branch_info->to.sym->name, sym)); +} + struct sort_entry sort_dso_from = { .se_header = "Source Shared Object", .se_cmp = sort__dso_from_cmp, .se_snprintf = hist_entry__dso_from_snprintf, + .se_filter = hist_entry__dso_from_filter, .se_width_idx = HISTC_DSO_FROM, }; @@ -557,6 +717,7 @@ struct sort_entry sort_dso_to = { .se_header = "Target Shared Object", .se_cmp = sort__dso_to_cmp, .se_snprintf = hist_entry__dso_to_snprintf, + .se_filter = hist_entry__dso_to_filter, .se_width_idx = HISTC_DSO_TO, }; @@ -564,6 +725,7 @@ struct sort_entry sort_sym_from = { .se_header = "Source Symbol", .se_cmp = sort__sym_from_cmp, .se_snprintf = hist_entry__sym_from_snprintf, + .se_filter = hist_entry__sym_from_filter, .se_width_idx = HISTC_SYMBOL_FROM, }; @@ -571,6 +733,7 @@ struct sort_entry sort_sym_to = { .se_header = "Target Symbol", .se_cmp = sort__sym_to_cmp, .se_snprintf = hist_entry__sym_to_snprintf, + .se_filter = hist_entry__sym_to_filter, .se_width_idx = HISTC_SYMBOL_TO, }; @@ -730,20 +893,10 @@ sort__locked_cmp(struct hist_entry *left, struct hist_entry *right) static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { - const char *out; - u64 mask = PERF_MEM_LOCK_NA; + char out[10]; - if (he->mem_info) - mask = he->mem_info->data_src.mem_lock; - - if (mask & PERF_MEM_LOCK_NA) - out = "N/A"; - else if (mask & PERF_MEM_LOCK_LOCKED) - out = "Yes"; - else - out = "No"; - - return repsep_snprintf(bf, size, "%-*s", width, out); + perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info); + return repsep_snprintf(bf, size, "%.*s", width, out); } static int64_t @@ -765,54 +918,12 @@ sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb); } -static const char * const tlb_access[] = { - "N/A", - "HIT", - "MISS", - "L1", - "L2", - "Walker", - "Fault", -}; -#define NUM_TLB_ACCESS (sizeof(tlb_access)/sizeof(const char *)) - static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t l = 0, i; - u64 m = PERF_MEM_TLB_NA; - u64 hit, miss; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_dtlb; - - hit = m & PERF_MEM_TLB_HIT; - miss = m & PERF_MEM_TLB_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS); - - for (i = 0; m && i < NUM_TLB_ACCESS; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, tlb_access[i], sz - l); - l += strlen(tlb_access[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } @@ -835,61 +946,12 @@ sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl); } -static const char * const mem_lvl[] = { - "N/A", - "HIT", - "MISS", - "L1", - "LFB", - "L2", - "L3", - "Local RAM", - "Remote RAM (1 hop)", - "Remote RAM (2 hops)", - "Remote Cache (1 hop)", - "Remote Cache (2 hops)", - "I/O", - "Uncached", -}; -#define NUM_MEM_LVL (sizeof(mem_lvl)/sizeof(const char *)) - static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_LVL_NA; - u64 hit, miss; - - if (he->mem_info) - m = he->mem_info->data_src.mem_lvl; - - out[0] = '\0'; - - hit = m & PERF_MEM_LVL_HIT; - miss = m & PERF_MEM_LVL_MISS; - - /* already taken care of */ - m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - - for (i = 0; m && i < NUM_MEM_LVL; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, mem_lvl[i], sz - l); - l += strlen(mem_lvl[i]); - } - if (*out == '\0') - strcpy(out, "N/A"); - if (hit) - strncat(out, " hit", sz - l); - if (miss) - strncat(out, " miss", sz - l); + perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } @@ -912,51 +974,15 @@ sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right) return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop); } -static const char * const snoop_access[] = { - "N/A", - "None", - "Miss", - "Hit", - "HitM", -}; -#define NUM_SNOOP_ACCESS (sizeof(snoop_access)/sizeof(const char *)) - static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { char out[64]; - size_t sz = sizeof(out) - 1; /* -1 for null termination */ - size_t i, l = 0; - u64 m = PERF_MEM_SNOOP_NA; - - out[0] = '\0'; - - if (he->mem_info) - m = he->mem_info->data_src.mem_snoop; - - for (i = 0; m && i < NUM_SNOOP_ACCESS; i++, m >>= 1) { - if (!(m & 0x1)) - continue; - if (l) { - strcat(out, " or "); - l += 4; - } - strncat(out, snoop_access[i], sz - l); - l += strlen(snoop_access[i]); - } - - if (*out == '\0') - strcpy(out, "N/A"); + perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info); return repsep_snprintf(bf, size, "%-*s", width, out); } -static inline u64 cl_address(u64 address) -{ - /* return the cacheline of the address */ - return (address & ~(cacheline_size - 1)); -} - static int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right) { @@ -1312,6 +1338,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), DIM(SORT_TRANSACTION, "transaction", sort_transaction), + DIM(SORT_TRACE, "trace", sort_trace), }; #undef DIM @@ -1372,20 +1399,6 @@ struct hpp_sort_entry { struct sort_entry *se; }; -bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) -{ - struct hpp_sort_entry *hse_a; - struct hpp_sort_entry *hse_b; - - if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) - return false; - - hse_a = container_of(a, struct hpp_sort_entry, hpp); - hse_b = container_of(b, struct hpp_sort_entry, hpp); - - return hse_a->se == hse_b->se; -} - void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) { struct hpp_sort_entry *hse; @@ -1471,8 +1484,56 @@ static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt, return sort_fn(a, b); } +bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +{ + return format->header == __sort__hpp_header; +} + +#define MK_SORT_ENTRY_CHK(key) \ +bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \ +{ \ + struct hpp_sort_entry *hse; \ + \ + if (!perf_hpp__is_sort_entry(fmt)) \ + return false; \ + \ + hse = container_of(fmt, struct hpp_sort_entry, hpp); \ + return hse->se == &sort_ ## key ; \ +} + +MK_SORT_ENTRY_CHK(trace) +MK_SORT_ENTRY_CHK(srcline) +MK_SORT_ENTRY_CHK(srcfile) +MK_SORT_ENTRY_CHK(thread) +MK_SORT_ENTRY_CHK(comm) +MK_SORT_ENTRY_CHK(dso) +MK_SORT_ENTRY_CHK(sym) + + +static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_sort_entry *hse_a; + struct hpp_sort_entry *hse_b; + + if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b)) + return false; + + hse_a = container_of(a, struct hpp_sort_entry, hpp); + hse_b = container_of(b, struct hpp_sort_entry, hpp); + + return hse_a->se == hse_b->se; +} + +static void hse_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_sort_entry *hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + free(hse); +} + static struct hpp_sort_entry * -__sort_dimension__alloc_hpp(struct sort_dimension *sd) +__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level) { struct hpp_sort_entry *hse; @@ -1492,49 +1553,613 @@ __sort_dimension__alloc_hpp(struct sort_dimension *sd) hse->hpp.cmp = __sort__hpp_cmp; hse->hpp.collapse = __sort__hpp_collapse; hse->hpp.sort = __sort__hpp_sort; + hse->hpp.equal = __sort__hpp_equal; + hse->hpp.free = hse_free; INIT_LIST_HEAD(&hse->hpp.list); INIT_LIST_HEAD(&hse->hpp.sort_list); hse->hpp.elide = false; hse->hpp.len = 0; hse->hpp.user_len = 0; + hse->hpp.level = level; return hse; } -bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format) +static void hpp_free(struct perf_hpp_fmt *fmt) { - return format->header == __sort__hpp_header; + free(fmt); +} + +static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd, + int level) +{ + struct perf_hpp_fmt *fmt; + + fmt = memdup(hd->fmt, sizeof(*fmt)); + if (fmt) { + INIT_LIST_HEAD(&fmt->list); + INIT_LIST_HEAD(&fmt->sort_list); + fmt->free = hpp_free; + fmt->level = level; + } + + return fmt; +} + +int hist_entry__filter(struct hist_entry *he, int type, const void *arg) +{ + struct perf_hpp_fmt *fmt; + struct hpp_sort_entry *hse; + int ret = -1; + int r; + + perf_hpp_list__for_each_format(he->hpp_list, fmt) { + if (!perf_hpp__is_sort_entry(fmt)) + continue; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + if (hse->se->se_filter == NULL) + continue; + + /* + * hist entry is filtered if any of sort key in the hpp list + * is applied. But it should skip non-matched filter types. + */ + r = hse->se->se_filter(he, type, arg); + if (r >= 0) { + if (ret < 0) + ret = 0; + ret |= r; + } + } + + return ret; } -static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; - perf_hpp__register_sort_field(&hse->hpp); + perf_hpp_list__register_sort_field(list, &hse->hpp); return 0; } -static int __sort_dimension__add_hpp_output(struct sort_dimension *sd) +static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, + struct perf_hpp_list *list) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); if (hse == NULL) return -1; - perf_hpp__column_register(&hse->hpp); + perf_hpp_list__column_register(list, &hse->hpp); return 0; } -static int __sort_dimension__add(struct sort_dimension *sd) +struct hpp_dynamic_entry { + struct perf_hpp_fmt hpp; + struct perf_evsel *evsel; + struct format_field *field; + unsigned dynamic_len; + bool raw_trace; +}; + +static int hde_width(struct hpp_dynamic_entry *hde) +{ + if (!hde->hpp.len) { + int len = hde->dynamic_len; + int namelen = strlen(hde->field->name); + int fieldlen = hde->field->size; + + if (namelen > len) + len = namelen; + + if (!(hde->field->flags & FIELD_IS_STRING)) { + /* length for print hex numbers */ + fieldlen = hde->field->size * 2 + 2; + } + if (fieldlen > len) + len = fieldlen; + + hde->hpp.len = len; + } + return hde->hpp.len; +} + +static void update_dynamic_len(struct hpp_dynamic_entry *hde, + struct hist_entry *he) +{ + char *str, *pos; + struct format_field *field = hde->field; + size_t namelen; + bool last = false; + + if (hde->raw_trace) + return; + + /* parse pretty print result and update max length */ + if (!he->trace_output) + he->trace_output = get_trace_output(he); + + namelen = strlen(field->name); + str = he->trace_output; + + while (str) { + pos = strchr(str, ' '); + if (pos == NULL) { + last = true; + pos = str + strlen(str); + } + + if (!strncmp(str, field->name, namelen)) { + size_t len; + + str += namelen + 1; + len = pos - str; + + if (len > hde->dynamic_len) + hde->dynamic_len = len; + break; + } + + if (last) + str = NULL; + else + str = pos + 1; + } +} + +static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct perf_evsel *evsel __maybe_unused) +{ + struct hpp_dynamic_entry *hde; + size_t len = fmt->user_len; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + + if (!len) + len = hde_width(hde); + + return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name); +} + +static int __sort__hde_width(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp __maybe_unused, + struct perf_evsel *evsel __maybe_unused) +{ + struct hpp_dynamic_entry *hde; + size_t len = fmt->user_len; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + + if (!len) + len = hde_width(hde); + + return len; +} + +bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists) +{ + struct hpp_dynamic_entry *hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + + return hists_to_evsel(hists) == hde->evsel; +} + +static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct hpp_dynamic_entry *hde; + size_t len = fmt->user_len; + char *str, *pos; + struct format_field *field; + size_t namelen; + bool last = false; + int ret; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + + if (!len) + len = hde_width(hde); + + if (hde->raw_trace) + goto raw_field; + + if (!he->trace_output) + he->trace_output = get_trace_output(he); + + field = hde->field; + namelen = strlen(field->name); + str = he->trace_output; + + while (str) { + pos = strchr(str, ' '); + if (pos == NULL) { + last = true; + pos = str + strlen(str); + } + + if (!strncmp(str, field->name, namelen)) { + str += namelen + 1; + str = strndup(str, pos - str); + + if (str == NULL) + return scnprintf(hpp->buf, hpp->size, + "%*.*s", len, len, "ERROR"); + break; + } + + if (last) + str = NULL; + else + str = pos + 1; + } + + if (str == NULL) { + struct trace_seq seq; +raw_field: + trace_seq_init(&seq); + pevent_print_field(&seq, he->raw_data, hde->field); + str = seq.buffer; + } + + ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str); + free(str); + return ret; +} + +static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct hpp_dynamic_entry *hde; + struct format_field *field; + unsigned offset, size; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + + if (b == NULL) { + update_dynamic_len(hde, a); + return 0; + } + + field = hde->field; + if (field->flags & FIELD_IS_DYNAMIC) { + unsigned long long dyn; + + pevent_read_number_field(field, a->raw_data, &dyn); + offset = dyn & 0xffff; + size = (dyn >> 16) & 0xffff; + + /* record max width for output */ + if (size > hde->dynamic_len) + hde->dynamic_len = size; + } else { + offset = field->offset; + size = field->size; + } + + return memcmp(a->raw_data + offset, b->raw_data + offset, size); +} + +bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt) +{ + return fmt->cmp == __sort__hde_cmp; +} + +static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct hpp_dynamic_entry *hde_a; + struct hpp_dynamic_entry *hde_b; + + if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b)) + return false; + + hde_a = container_of(a, struct hpp_dynamic_entry, hpp); + hde_b = container_of(b, struct hpp_dynamic_entry, hpp); + + return hde_a->field == hde_b->field; +} + +static void hde_free(struct perf_hpp_fmt *fmt) +{ + struct hpp_dynamic_entry *hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + free(hde); +} + +static struct hpp_dynamic_entry * +__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, + int level) +{ + struct hpp_dynamic_entry *hde; + + hde = malloc(sizeof(*hde)); + if (hde == NULL) { + pr_debug("Memory allocation failed\n"); + return NULL; + } + + hde->evsel = evsel; + hde->field = field; + hde->dynamic_len = 0; + + hde->hpp.name = field->name; + hde->hpp.header = __sort__hde_header; + hde->hpp.width = __sort__hde_width; + hde->hpp.entry = __sort__hde_entry; + hde->hpp.color = NULL; + + hde->hpp.cmp = __sort__hde_cmp; + hde->hpp.collapse = __sort__hde_cmp; + hde->hpp.sort = __sort__hde_cmp; + hde->hpp.equal = __sort__hde_equal; + hde->hpp.free = hde_free; + + INIT_LIST_HEAD(&hde->hpp.list); + INIT_LIST_HEAD(&hde->hpp.sort_list); + hde->hpp.elide = false; + hde->hpp.len = 0; + hde->hpp.user_len = 0; + hde->hpp.level = level; + + return hde; +} + +struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt) +{ + struct perf_hpp_fmt *new_fmt = NULL; + + if (perf_hpp__is_sort_entry(fmt)) { + struct hpp_sort_entry *hse, *new_hse; + + hse = container_of(fmt, struct hpp_sort_entry, hpp); + new_hse = memdup(hse, sizeof(*hse)); + if (new_hse) + new_fmt = &new_hse->hpp; + } else if (perf_hpp__is_dynamic_entry(fmt)) { + struct hpp_dynamic_entry *hde, *new_hde; + + hde = container_of(fmt, struct hpp_dynamic_entry, hpp); + new_hde = memdup(hde, sizeof(*hde)); + if (new_hde) + new_fmt = &new_hde->hpp; + } else { + new_fmt = memdup(fmt, sizeof(*fmt)); + } + + INIT_LIST_HEAD(&new_fmt->list); + INIT_LIST_HEAD(&new_fmt->sort_list); + + return new_fmt; +} + +static int parse_field_name(char *str, char **event, char **field, char **opt) +{ + char *event_name, *field_name, *opt_name; + + event_name = str; + field_name = strchr(str, '.'); + + if (field_name) { + *field_name++ = '\0'; + } else { + event_name = NULL; + field_name = str; + } + + opt_name = strchr(field_name, '/'); + if (opt_name) + *opt_name++ = '\0'; + + *event = event_name; + *field = field_name; + *opt = opt_name; + + return 0; +} + +/* find match evsel using a given event name. The event name can be: + * 1. '%' + event index (e.g. '%1' for first event) + * 2. full event name (e.g. sched:sched_switch) + * 3. partial event name (should not contain ':') + */ +static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name) +{ + struct perf_evsel *evsel = NULL; + struct perf_evsel *pos; + bool full_name; + + /* case 1 */ + if (event_name[0] == '%') { + int nr = strtol(event_name+1, NULL, 0); + + if (nr > evlist->nr_entries) + return NULL; + + evsel = perf_evlist__first(evlist); + while (--nr > 0) + evsel = perf_evsel__next(evsel); + + return evsel; + } + + full_name = !!strchr(event_name, ':'); + evlist__for_each(evlist, pos) { + /* case 2 */ + if (full_name && !strcmp(pos->name, event_name)) + return pos; + /* case 3 */ + if (!full_name && strstr(pos->name, event_name)) { + if (evsel) { + pr_debug("'%s' event is ambiguous: it can be %s or %s\n", + event_name, evsel->name, pos->name); + return NULL; + } + evsel = pos; + } + } + + return evsel; +} + +static int __dynamic_dimension__add(struct perf_evsel *evsel, + struct format_field *field, + bool raw_trace, int level) +{ + struct hpp_dynamic_entry *hde; + + hde = __alloc_dynamic_entry(evsel, field, level); + if (hde == NULL) + return -ENOMEM; + + hde->raw_trace = raw_trace; + + perf_hpp__register_sort_field(&hde->hpp); + return 0; +} + +static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level) +{ + int ret; + struct format_field *field; + + field = evsel->tp_format->format.fields; + while (field) { + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); + if (ret < 0) + return ret; + + field = field->next; + } + return 0; +} + +static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace, + int level) +{ + int ret; + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + + ret = add_evsel_fields(evsel, raw_trace, level); + if (ret < 0) + return ret; + } + return 0; +} + +static int add_all_matching_fields(struct perf_evlist *evlist, + char *field_name, bool raw_trace, int level) +{ + int ret = -ESRCH; + struct perf_evsel *evsel; + struct format_field *field; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + continue; + + field = pevent_find_any_field(evsel->tp_format, field_name); + if (field == NULL) + continue; + + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); + if (ret < 0) + break; + } + return ret; +} + +static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, + int level) +{ + char *str, *event_name, *field_name, *opt_name; + struct perf_evsel *evsel; + struct format_field *field; + bool raw_trace = symbol_conf.raw_trace; + int ret = 0; + + if (evlist == NULL) + return -ENOENT; + + str = strdup(tok); + if (str == NULL) + return -ENOMEM; + + if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) { + ret = -EINVAL; + goto out; + } + + if (opt_name) { + if (strcmp(opt_name, "raw")) { + pr_debug("unsupported field option %s\n", opt_name); + ret = -EINVAL; + goto out; + } + raw_trace = true; + } + + if (!strcmp(field_name, "trace_fields")) { + ret = add_all_dynamic_fields(evlist, raw_trace, level); + goto out; + } + + if (event_name == NULL) { + ret = add_all_matching_fields(evlist, field_name, raw_trace, level); + goto out; + } + + evsel = find_evsel(evlist, event_name); + if (evsel == NULL) { + pr_debug("Cannot find event: %s\n", event_name); + ret = -ENOENT; + goto out; + } + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { + pr_debug("%s is not a tracepoint event\n", event_name); + ret = -EINVAL; + goto out; + } + + if (!strcmp(field_name, "*")) { + ret = add_evsel_fields(evsel, raw_trace, level); + } else { + field = pevent_find_any_field(evsel->tp_format, field_name); + if (field == NULL) { + pr_debug("Cannot find event field for %s.%s\n", + event_name, field_name); + return -ENOENT; + } + + ret = __dynamic_dimension__add(evsel, field, raw_trace, level); + } + +out: + free(str); + return ret; +} + +static int __sort_dimension__add(struct sort_dimension *sd, + struct perf_hpp_list *list, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_sort(sd) < 0) + if (__sort_dimension__add_hpp_sort(sd, list, level) < 0) return -1; if (sd->entry->se_collapse) @@ -1545,45 +2170,63 @@ static int __sort_dimension__add(struct sort_dimension *sd) return 0; } -static int __hpp_dimension__add(struct hpp_dimension *hd) +static int __hpp_dimension__add(struct hpp_dimension *hd, + struct perf_hpp_list *list, + int level) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__register_sort_field(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd, level); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp_list__register_sort_field(list, fmt); return 0; } -static int __sort_dimension__add_output(struct sort_dimension *sd) +static int __sort_dimension__add_output(struct perf_hpp_list *list, + struct sort_dimension *sd) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd) < 0) + if (__sort_dimension__add_hpp_output(sd, list) < 0) return -1; sd->taken = 1; return 0; } -static int __hpp_dimension__add_output(struct hpp_dimension *hd) +static int __hpp_dimension__add_output(struct perf_hpp_list *list, + struct hpp_dimension *hd) { - if (!hd->taken) { - hd->taken = 1; + struct perf_hpp_fmt *fmt; - perf_hpp__column_register(hd->fmt); - } + if (hd->taken) + return 0; + + fmt = __hpp_dimension__alloc_hpp(hd, 0); + if (!fmt) + return -1; + + hd->taken = 1; + perf_hpp_list__column_register(list, fmt); return 0; } int hpp_dimension__add_output(unsigned col) { BUG_ON(col >= PERF_HPP__MAX_INDEX); - return __hpp_dimension__add_output(&hpp_sort_dimensions[col]); + return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]); } -int sort_dimension__add(const char *tok) +static int sort_dimension__add(struct perf_hpp_list *list, const char *tok, + struct perf_evlist *evlist __maybe_unused, + int level) { unsigned int i; @@ -1618,9 +2261,13 @@ int sort_dimension__add(const char *tok) sort__has_dso = 1; } else if (sd->entry == &sort_socket) { sort__has_socket = 1; + } else if (sd->entry == &sort_thread) { + sort__has_thread = 1; + } else if (sd->entry == &sort_comm) { + sort__has_comm = 1; } - return __sort_dimension__add(sd); + return __sort_dimension__add(sd, list, level); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -1629,7 +2276,7 @@ int sort_dimension__add(const char *tok) if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add(hd); + return __hpp_dimension__add(hd, list, level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -1644,7 +2291,7 @@ int sort_dimension__add(const char *tok) if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, list, level); return 0; } @@ -1660,14 +2307,61 @@ int sort_dimension__add(const char *tok) if (sd->entry == &sort_mem_daddr_sym) sort__has_sym = 1; - __sort_dimension__add(sd); + __sort_dimension__add(sd, list, level); return 0; } + if (!add_dynamic_entry(evlist, tok, level)) + return 0; + return -ESRCH; } -static const char *get_default_sort_order(void) +static int setup_sort_list(struct perf_hpp_list *list, char *str, + struct perf_evlist *evlist) +{ + char *tmp, *tok; + int ret = 0; + int level = 0; + int next_level = 1; + bool in_group = false; + + do { + tok = str; + tmp = strpbrk(str, "{}, "); + if (tmp) { + if (in_group) + next_level = level; + else + next_level = level + 1; + + if (*tmp == '{') + in_group = true; + else if (*tmp == '}') + in_group = false; + + *tmp = '\0'; + str = tmp + 1; + } + + if (*tok) { + ret = sort_dimension__add(list, tok, evlist, level); + if (ret == -EINVAL) { + error("Invalid --sort key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --sort key: `%s'", tok); + break; + } + } + + level = next_level; + } while (tmp); + + return ret; +} + +static const char *get_default_sort_order(struct perf_evlist *evlist) { const char *default_sort_orders[] = { default_sort_order, @@ -1675,14 +2369,33 @@ static const char *get_default_sort_order(void) default_mem_sort_order, default_top_sort_order, default_diff_sort_order, + default_tracepoint_sort_order, }; + bool use_trace = true; + struct perf_evsel *evsel; BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders)); + if (evlist == NULL) + goto out_no_evlist; + + evlist__for_each(evlist, evsel) { + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { + use_trace = false; + break; + } + } + + if (use_trace) { + sort__mode = SORT_MODE__TRACEPOINT; + if (symbol_conf.raw_trace) + return "trace_fields"; + } +out_no_evlist: return default_sort_orders[sort__mode]; } -static int setup_sort_order(void) +static int setup_sort_order(struct perf_evlist *evlist) { char *new_sort_order; @@ -1703,7 +2416,7 @@ static int setup_sort_order(void) * because it's checked over the rest of the code. */ if (asprintf(&new_sort_order, "%s,%s", - get_default_sort_order(), sort_order + 1) < 0) { + get_default_sort_order(evlist), sort_order + 1) < 0) { error("Not enough memory to set up --sort"); return -ENOMEM; } @@ -1712,13 +2425,41 @@ static int setup_sort_order(void) return 0; } -static int __setup_sorting(void) +/* + * Adds 'pre,' prefix into 'str' is 'pre' is + * not already part of 'str'. + */ +static char *prefix_if_not_in(const char *pre, char *str) +{ + char *n; + + if (!str || strstr(str, pre)) + return str; + + if (asprintf(&n, "%s,%s", pre, str) < 0) + return NULL; + + free(str); + return n; +} + +static char *setup_overhead(char *keys) +{ + keys = prefix_if_not_in("overhead", keys); + + if (symbol_conf.cumulate_callchain) + keys = prefix_if_not_in("overhead_children", keys); + + return keys; +} + +static int __setup_sorting(struct perf_evlist *evlist) { - char *tmp, *tok, *str; + char *str; const char *sort_keys; int ret = 0; - ret = setup_sort_order(); + ret = setup_sort_order(evlist); if (ret) return ret; @@ -1732,7 +2473,7 @@ static int __setup_sorting(void) return 0; } - sort_keys = get_default_sort_order(); + sort_keys = get_default_sort_order(evlist); } str = strdup(sort_keys); @@ -1741,18 +2482,19 @@ static int __setup_sorting(void) return -ENOMEM; } - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = sort_dimension__add(tok); - if (ret == -EINVAL) { - error("Invalid --sort key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --sort key: `%s'", tok); - break; + /* + * Prepend overhead fields for backward compatibility. + */ + if (!is_strict_order(field_order)) { + str = setup_overhead(str); + if (str == NULL) { + error("Not enough memory to setup overhead keys"); + return -ENOMEM; } } + ret = setup_sort_list(&perf_hpp_list, str, evlist); + free(str); return ret; } @@ -1762,7 +2504,7 @@ void perf_hpp__set_elide(int idx, bool elide) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -1822,7 +2564,7 @@ void sort__setup_elide(FILE *output) struct perf_hpp_fmt *fmt; struct hpp_sort_entry *hse; - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -1834,7 +2576,7 @@ void sort__setup_elide(FILE *output) * It makes no sense to elide all of sort entries. * Just revert them to show up again. */ - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -1842,7 +2584,7 @@ void sort__setup_elide(FILE *output) return; } - perf_hpp__for_each_format(fmt) { + perf_hpp_list__for_each_format(&perf_hpp_list, fmt) { if (!perf_hpp__is_sort_entry(fmt)) continue; @@ -1850,7 +2592,7 @@ void sort__setup_elide(FILE *output) } } -static int output_field_add(char *tok) +static int output_field_add(struct perf_hpp_list *list, char *tok) { unsigned int i; @@ -1860,7 +2602,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { @@ -1869,7 +2611,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, hd->name, strlen(tok))) continue; - return __hpp_dimension__add_output(hd); + return __hpp_dimension__add_output(list, hd); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -1878,7 +2620,7 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -1887,12 +2629,32 @@ static int output_field_add(char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(sd); + return __sort_dimension__add_output(list, sd); } return -ESRCH; } +static int setup_output_list(struct perf_hpp_list *list, char *str) +{ + char *tmp, *tok; + int ret = 0; + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + ret = output_field_add(list, tok); + if (ret == -EINVAL) { + error("Invalid --fields key: `%s'", tok); + break; + } else if (ret == -ESRCH) { + error("Unknown --fields key: `%s'", tok); + break; + } + } + + return ret; +} + static void reset_dimensions(void) { unsigned int i; @@ -1917,7 +2679,7 @@ bool is_strict_order(const char *order) static int __setup_output_field(void) { - char *tmp, *tok, *str, *strp; + char *str, *strp; int ret = -EINVAL; if (field_order == NULL) @@ -1937,33 +2699,23 @@ static int __setup_output_field(void) goto out; } - for (tok = strtok_r(strp, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(tok); - if (ret == -EINVAL) { - error("Invalid --fields key: `%s'", tok); - break; - } else if (ret == -ESRCH) { - error("Unknown --fields key: `%s'", tok); - break; - } - } + ret = setup_output_list(&perf_hpp_list, strp); out: free(str); return ret; } -int setup_sorting(void) +int setup_sorting(struct perf_evlist *evlist) { int err; - err = __setup_sorting(); + err = __setup_sorting(evlist); if (err < 0) return err; if (parent_pattern != default_parent_pattern) { - err = sort_dimension__add("parent"); + err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1); if (err < 0) return err; } @@ -1981,9 +2733,13 @@ int setup_sorting(void) return err; /* copy sort keys to output fields */ - perf_hpp__setup_output_field(); + perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ - perf_hpp__append_sort_keys(); + perf_hpp__append_sort_keys(&perf_hpp_list); + + /* setup hists-specific output fields */ + if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0) + return -1; return 0; } @@ -1999,5 +2755,5 @@ void reset_output_field(void) sort_order = NULL; reset_dimensions(); - perf_hpp__reset_output_field(); + perf_hpp__reset_output_field(&perf_hpp_list); } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 31228851e397..3f4e35998119 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -18,7 +18,7 @@ #include "debug.h" #include "header.h" -#include "parse-options.h" +#include <subcmd/parse-options.h> #include "parse-events.h" #include "hist.h" #include "thread.h" @@ -32,9 +32,12 @@ extern const char default_sort_order[]; extern regex_t ignore_callees_regex; extern int have_ignore_callees; extern int sort__need_collapse; +extern int sort__has_dso; extern int sort__has_parent; extern int sort__has_sym; extern int sort__has_socket; +extern int sort__has_thread; +extern int sort__has_comm; extern enum sort_mode sort__mode; extern struct sort_entry sort_comm; extern struct sort_entry sort_dso; @@ -94,9 +97,11 @@ struct hist_entry { s32 socket; s32 cpu; u8 cpumode; + u8 depth; /* We are added by hists__add_dummy_entry. */ bool dummy; + bool leaf; char level; u8 filtered; @@ -113,15 +118,28 @@ struct hist_entry { bool init_have_children; bool unfolded; bool has_children; + bool has_no_entry; }; }; char *srcline; char *srcfile; struct symbol *parent; - struct rb_root sorted_chain; struct branch_info *branch_info; struct hists *hists; struct mem_info *mem_info; + void *raw_data; + u32 raw_size; + void *trace_output; + struct perf_hpp_list *hpp_list; + struct hist_entry *parent_he; + union { + /* this is for hierarchical entry structure */ + struct { + struct rb_root hroot_in; + struct rb_root hroot_out; + }; /* non-leaf entries */ + struct rb_root sorted_chain; /* leaf entry has callchains */ + }; struct callchain_root callchain[0]; /* must be last member */ }; @@ -157,6 +175,17 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he) return period * 100.0 / total_period; } +static inline u64 cl_address(u64 address) +{ + /* return the cacheline of the address */ + return (address & ~(cacheline_size - 1)); +} + +static inline u64 cl_offset(u64 address) +{ + /* return the cacheline of the address */ + return (address & (cacheline_size - 1)); +} enum sort_mode { SORT_MODE__NORMAL, @@ -164,6 +193,7 @@ enum sort_mode { SORT_MODE__MEMORY, SORT_MODE__TOP, SORT_MODE__DIFF, + SORT_MODE__TRACEPOINT, }; enum sort_type { @@ -180,6 +210,7 @@ enum sort_type { SORT_LOCAL_WEIGHT, SORT_GLOBAL_WEIGHT, SORT_TRANSACTION, + SORT_TRACE, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -209,8 +240,6 @@ enum sort_type { */ struct sort_entry { - struct list_head list; - const char *se_header; int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *); @@ -218,16 +247,18 @@ struct sort_entry { int64_t (*se_sort)(struct hist_entry *, struct hist_entry *); int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, unsigned int width); + int (*se_filter)(struct hist_entry *he, int type, const void *arg); u8 se_width_idx; }; extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; -int setup_sorting(void); +struct perf_evlist; +struct pevent; +int setup_sorting(struct perf_evlist *evlist); int setup_output_field(void); void reset_output_field(void); -extern int sort_dimension__add(const char *); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 2a5d8d7698ae..b33ffb2af2cf 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -2,6 +2,7 @@ #include "evsel.h" #include "stat.h" #include "color.h" +#include "pmu.h" enum { CTX_BIT_USER = 1 << 0, @@ -14,6 +15,13 @@ enum { #define NUM_CTX CTX_BIT_MAX +/* + * AGGR_GLOBAL: Use CPU 0 + * AGGR_SOCKET: Use first CPU of socket + * AGGR_CORE: Use first CPU of core + * AGGR_NONE: Use matching CPU + * AGGR_THREAD: Not supported? + */ static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; @@ -28,9 +36,15 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; +static bool have_frontend_stalled; struct stats walltime_nsecs_stats; +void perf_stat__init_shadow_stats(void) +{ + have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); +} + static int evsel_context(struct perf_evsel *evsel) { int ctx = 0; @@ -137,9 +151,10 @@ static const char *get_ratio_color(enum grc_type type, double ratio) return color; } -static void print_stalled_cycles_frontend(FILE *out, int cpu, +static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -152,14 +167,17 @@ static void print_stalled_cycles_frontend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " frontend cycles idle "); + if (ratio) + out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + ratio); + else + out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(FILE *out, int cpu, +static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel - __maybe_unused, double avg) + __maybe_unused, double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -172,14 +190,13 @@ static void print_stalled_cycles_backend(FILE *out, int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " backend cycles idle "); + out->print_metric(out->ctx, color, "%6.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(FILE *out, int cpu, +static void print_branch_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -192,14 +209,13 @@ static void print_branch_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all branches "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(FILE *out, int cpu, +static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -212,14 +228,13 @@ static void print_l1_dcache_misses(FILE *out, int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-dcache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(FILE *out, int cpu, +static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -231,15 +246,13 @@ static void print_l1_icache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all L1-icache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(FILE *out, int cpu, +static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -251,15 +264,13 @@ static void print_dtlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all dTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(FILE *out, int cpu, +static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -271,15 +282,13 @@ static void print_itlb_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all iTLB cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(FILE *out, int cpu, +static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __maybe_unused, - double avg) + double avg, + struct perf_stat_output_ctx *out) { double total, ratio = 0.0; const char *color; @@ -291,15 +300,15 @@ static void print_ll_cache_misses(FILE *out, int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(out, " # "); - color_fprintf(out, color, "%6.2f%%", ratio); - fprintf(out, " of all LL-cache hits "); + out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr) +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out) { + void *ctxp = out->ctx; + print_metric_t print_metric = out->print_metric; double total, ratio = 0.0, total2; int ctx = evsel_context(evsel); @@ -307,114 +316,145 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %5.2f insns per cycle ", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "insn per cycle", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); if (total && avg) { + out->new_line(ctxp); ratio = total / avg; - fprintf(out, "\n"); - if (aggr == AGGR_NONE) - fprintf(out, " "); - fprintf(out, " # %5.2f stalled cycles per insn", ratio); + print_metric(ctxp, NULL, "%7.2f ", + "stalled cycles per insn", + ratio); + } else if (have_frontend_stalled) { + print_metric(ctxp, NULL, NULL, + "stalled cycles per insn", 0); } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[ctx][cpu].n != 0) { - print_branch_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { + if (runtime_branches_stats[ctx][cpu].n != 0) + print_branch_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[ctx][cpu].n != 0) { - print_l1_dcache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_dcache_stats[ctx][cpu].n != 0) + print_l1_dcache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[ctx][cpu].n != 0) { - print_l1_icache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_l1_icache_stats[ctx][cpu].n != 0) + print_l1_icache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[ctx][cpu].n != 0) { - print_dtlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[ctx][cpu].n != 0) { - print_itlb_cache_misses(out, cpu, evsel, avg); + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_itlb_cache_stats[ctx][cpu].n != 0) + print_itlb_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[ctx][cpu].n != 0) { - print_ll_cache_misses(out, cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[ctx][cpu].n != 0) { + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { + if (runtime_ll_cache_stats[ctx][cpu].n != 0) + print_ll_cache_misses(cpu, evsel, avg, out); + else + print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); if (total) ratio = avg * 100 / total; - fprintf(out, " # %8.3f %% of all cache refs ", ratio); - + if (runtime_cacherefs_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.3f %%", + "of all cache refs", ratio); + else + print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(out, cpu, evsel, avg); + print_stalled_cycles_frontend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(out, cpu, evsel, avg); + print_stalled_cycles_backend(cpu, evsel, avg, out); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = avg_stats(&runtime_nsecs_stats[cpu]); if (total) { ratio = avg / total; - fprintf(out, " # %8.3f GHz ", ratio); + print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); if (total) - fprintf(out, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); + print_metric(ctxp, NULL, + "%7.2f%%", "transactional cycles", + 100.0 * (avg / total)); + else + print_metric(ctxp, NULL, NULL, "transactional cycles", + 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = avg_stats(&runtime_cycles_stats[ctx][cpu]); total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (total2 < avg) total2 = avg; if (total) - fprintf(out, - " # %5.2f%% aborted cycles ", + print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); - } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + else + print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / transaction ", ratio); - } else if (perf_stat_evsel__is(evsel, ELISION_START) && - runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + print_metric(ctxp, NULL, "%8.0f", + "cycles / transaction", ratio); + else + print_metric(ctxp, NULL, NULL, "cycles / transaction", + 0); + } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); if (avg) ratio = total / avg; - fprintf(out, " # %8.0f cycles / elision ", ratio); + print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); + } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) { + if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) + print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", + avg / ratio); + else + print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (runtime_nsecs_stats[cpu].n != 0) { char unit = 'M'; + char unit_buf[10]; total = avg_stats(&runtime_nsecs_stats[cpu]); @@ -424,9 +464,9 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, ratio *= 1000; unit = 'K'; } - - fprintf(out, " # %8.3f %c/sec ", ratio, unit); + snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); + print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else { - fprintf(out, " "); + print_metric(ctxp, NULL, NULL, NULL, 0); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2d9d8306dbd3..4d9b481cf3b6 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -97,7 +97,7 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) } } -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) { int i; struct perf_stat_evsel *ps = evsel->priv; @@ -108,7 +108,7 @@ void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) perf_stat_evsel_id_init(evsel); } -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) { evsel->priv = zalloc(sizeof(struct perf_stat_evsel)); if (evsel->priv == NULL) @@ -117,13 +117,13 @@ int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) return 0; } -void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) { zfree(&evsel->priv); } -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads) +static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) { struct perf_counts *counts; @@ -134,13 +134,13 @@ int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, return counts ? 0 : -ENOMEM; } -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) { perf_counts__delete(evsel->prev_raw_counts); evsel->prev_raw_counts = NULL; } -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) { int ncpus = perf_evsel__nr_cpus(evsel); int nthreads = thread_map__nr(evsel->threads); @@ -310,7 +310,16 @@ int perf_stat_process_counter(struct perf_stat_config *config, int i, ret; aggr->val = aggr->ena = aggr->run = 0; - init_stats(ps->res_stats); + + /* + * We calculate counter's data every interval, + * and the display code shows ps->res_stats + * avg value. We need to zero the stats for + * interval mode, otherwise overall avg running + * averages will be shown for each interval. + */ + if (config->interval) + init_stats(ps->res_stats); if (counter->per_pkg) zero_per_pkg(counter); @@ -341,3 +350,65 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; } + +int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + struct perf_counts_values count; + struct stat_event *st = &event->stat; + struct perf_evsel *counter; + + count.val = st->val; + count.ena = st->ena; + count.run = st->run; + + counter = perf_evlist__id2evsel(session->evlist, st->id); + if (!counter) { + pr_err("Failed to resolve counter for stat event.\n"); + return -EINVAL; + } + + *perf_counts(counter->counts, st->cpu, st->thread) = count; + counter->supported = true; + return 0; +} + +size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp) +{ + struct stat_event *st = (struct stat_event *) event; + size_t ret; + + ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n", + st->id, st->cpu, st->thread); + ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n", + st->val, st->ena, st->run); + + return ret; +} + +size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp) +{ + struct stat_round_event *rd = (struct stat_round_event *)event; + size_t ret; + + ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time, + rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL"); + + return ret; +} + +size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) +{ + struct perf_stat_config sc; + size_t ret; + + perf_event__read_stat_config(&sc, &event->stat_config); + + ret = fprintf(fp, "\n"); + ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode); + ret += fprintf(fp, "... scale %d\n", sc.scale); + ret += fprintf(fp, "... interval %u\n", sc.interval); + + return ret; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index da1d11c4f8c1..0150e786ccc7 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -68,21 +68,23 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel); extern struct stats walltime_nsecs_stats; +typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, + const char *fmt, double val); +typedef void (*new_line_t )(void *ctx); + +void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, int cpu); -void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, - double avg, int cpu, enum aggr_mode aggr); - -void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); -int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); -void perf_evsel__free_stat_priv(struct perf_evsel *evsel); - -int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, - int ncpus, int nthreads); -void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); +struct perf_stat_output_ctx { + void *ctx; + print_metric_t print_metric; + new_line_t new_line; +}; -int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); +void perf_stat__print_shadow_stats(struct perf_evsel *evsel, + double avg, int cpu, + struct perf_stat_output_ctx *out); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); void perf_evlist__free_stats(struct perf_evlist *evlist); @@ -90,4 +92,14 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist); int perf_stat_process_counter(struct perf_stat_config *config, struct perf_evsel *counter); +struct perf_tool; +union perf_event; +struct perf_session; +int perf_event__process_stat_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); + +size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); #endif diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 25671fa16618..d3d279275432 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -51,30 +51,6 @@ void strbuf_grow(struct strbuf *sb, size_t extra) ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); } -static void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, - const void *data, size_t dlen) -{ - if (pos + len < pos) - die("you want to use way too much memory"); - if (pos > sb->len) - die("`pos' is too far after the end of the buffer"); - if (pos + len > sb->len) - die("`pos + len' is too far after the end of the buffer"); - - if (dlen >= len) - strbuf_grow(sb, dlen - len); - memmove(sb->buf + pos + dlen, - sb->buf + pos + len, - sb->len - pos - len); - memcpy(sb->buf + pos, data, dlen); - strbuf_setlen(sb, sb->len + dlen - len); -} - -void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_splice(sb, pos, len, NULL, 0); -} - void strbuf_add(struct strbuf *sb, const void *data, size_t len) { strbuf_grow(sb, len); diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h index 529f2f035249..7a32c838884d 100644 --- a/tools/perf/util/strbuf.h +++ b/tools/perf/util/strbuf.h @@ -77,8 +77,6 @@ static inline void strbuf_addch(struct strbuf *sb, int c) { sb->buf[sb->len] = '\0'; } -extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); - extern void strbuf_add(struct strbuf *, const void *, size_t); static inline void strbuf_addstr(struct strbuf *sb, const char *s) { strbuf_add(sb, s, strlen(s)); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index fc8781de62db..7f7e072be746 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -342,22 +342,6 @@ char *rtrim(char *s) return s; } -/** - * memdup - duplicate region of memory - * @src: memory region to duplicate - * @len: memory region length - */ -void *memdup(const void *src, size_t len) -{ - void *p; - - p = malloc(len); - if (p) - memcpy(p, src, len); - - return p; -} - char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) { /* diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index bdf98f6f27bb..0d3dfcb919b4 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c @@ -126,6 +126,11 @@ static int strlist__parse_list_entry(struct strlist *slist, const char *s, err = strlist__load(slist, subst); goto out; } + + if (slist->file_only) { + err = -ENOENT; + goto out; + } } err = strlist__add(slist, s); @@ -157,11 +162,13 @@ struct strlist *strlist__new(const char *list, const struct strlist_config *conf if (slist != NULL) { bool dupstr = true; + bool file_only = false; const char *dirname = NULL; if (config) { dupstr = !config->dont_dupstr; dirname = config->dirname; + file_only = config->file_only; } rblist__init(&slist->rblist); @@ -170,6 +177,7 @@ struct strlist *strlist__new(const char *list, const struct strlist_config *conf slist->rblist.node_delete = strlist__node_delete; slist->dupstr = dupstr; + slist->file_only = file_only; if (list && strlist__parse_list(slist, list, dirname) != 0) goto out_error; diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 297565aa7535..ca990029e243 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h @@ -13,11 +13,18 @@ struct str_node { struct strlist { struct rblist rblist; - bool dupstr; + bool dupstr; + bool file_only; }; +/* + * @file_only: When dirname is present, only consider entries as filenames, + * that should not be added to the list if dirname/entry is not + * found + */ struct strlist_config { bool dont_dupstr; + bool file_only; const char *dirname; }; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 475d88d0a1c9..b1dd68f358fc 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -6,6 +6,7 @@ #include <inttypes.h> #include "symbol.h" +#include "demangle-java.h" #include "machine.h" #include "vdso.h" #include <symbol/kallsyms.h> @@ -1026,8 +1027,8 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_dso->long_name_len = dso->long_name_len; curr_map = map__new2(start, curr_dso, map->type); + dso__put(curr_dso); if (curr_map == NULL) { - dso__put(curr_dso); goto out_elf_end; } if (adjust_kernel_syms) { @@ -1042,7 +1043,14 @@ int dso__load_sym(struct dso *dso, struct map *map, } curr_dso->symtab_type = dso->symtab_type; map_groups__insert(kmaps, curr_map); + /* + * Add it before we drop the referece to curr_map, + * i.e. while we still are sure to have a reference + * to this DSO via curr_map->dso. + */ dsos__add(&map->groups->machine->dsos, curr_dso); + /* kmaps already got it */ + map__put(curr_map); dso__set_loaded(curr_dso, map->type); } else curr_dso = curr_map->dso; @@ -1070,6 +1078,8 @@ new_symbol: demangle_flags = DMGL_PARAMS | DMGL_ANSI; demangled = bfd_demangle(NULL, elf_name, demangle_flags); + if (demangled == NULL) + demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); if (demangled != NULL) elf_name = demangled; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b4cc7662677e..e7588dc91518 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -39,6 +39,7 @@ struct symbol_conf symbol_conf = { .cumulate_callchain = true, .show_hist_headers = true, .symfs = "", + .event_group = true, }; static enum dso_binary_type binary_type_symtab[] = { @@ -654,19 +655,24 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; - int count = 0, moved = 0; + int count = 0; + struct rb_root old_root = dso->symbols[map->type]; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); if (!kmaps) return -1; + *root = RB_ROOT; + while (next) { char *module; pos = rb_entry(next, struct symbol, rb_node); next = rb_next(&pos->rb_node); + rb_erase_init(&pos->rb_node, &old_root); + module = strchr(pos->name, '\t'); if (module) *module = '\0'; @@ -674,28 +680,21 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, curr_map = map_groups__find(kmaps, map->type, pos->start); if (!curr_map || (filter && filter(curr_map, pos))) { - rb_erase_init(&pos->rb_node, root); symbol__delete(pos); - } else { - pos->start -= curr_map->start - curr_map->pgoff; - if (pos->end) - pos->end -= curr_map->start - curr_map->pgoff; - if (curr_map->dso != map->dso) { - rb_erase_init(&pos->rb_node, root); - symbols__insert( - &curr_map->dso->symbols[curr_map->type], - pos); - ++moved; - } else { - ++count; - } + continue; } + + pos->start -= curr_map->start - curr_map->pgoff; + if (pos->end) + pos->end -= curr_map->start - curr_map->pgoff; + symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + ++count; } /* Symbols have been adjusted */ dso->adjust_symbols = 1; - return count + moved; + return count; } /* @@ -1438,9 +1437,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (lstat(dso->name, &st) < 0) goto out; - if (st.st_uid && (st.st_uid != geteuid())) { + if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) { pr_warning("File %s not owned by current user or root, " - "ignoring it.\n", dso->name); + "ignoring it (use -f to override).\n", dso->name); goto out; } @@ -1467,7 +1466,8 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work */ - if (filename__read_build_id(dso->name, build_id, BUILD_ID_SIZE) > 0) + if (is_regular_file(name) && + filename__read_build_id(dso->long_name, build_id, BUILD_ID_SIZE) > 0) dso__set_build_id(dso, build_id); /* @@ -1488,6 +1488,9 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) root_dir, name, PATH_MAX)) continue; + if (!is_regular_file(name)) + continue; + /* Name is now the name of the next image to try */ if (symsrc__init(ss, dso, name, symtab_type) < 0) continue; @@ -1526,6 +1529,10 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (!runtime_ss && syms_ss) runtime_ss = syms_ss; + if (syms_ss && syms_ss->type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + if (dso__build_id_is_kmod(dso, name, PATH_MAX)) + kmod = true; + if (syms_ss) ret = dso__load_sym(dso, map, syms_ss, runtime_ss, filter, kmod); else @@ -1862,24 +1869,44 @@ static void vmlinux_path__exit(void) zfree(&vmlinux_path); } +static const char * const vmlinux_paths[] = { + "vmlinux", + "/boot/vmlinux" +}; + +static const char * const vmlinux_paths_upd[] = { + "/boot/vmlinux-%s", + "/usr/lib/debug/boot/vmlinux-%s", + "/lib/modules/%s/build/vmlinux", + "/usr/lib/debug/lib/modules/%s/vmlinux", + "/usr/lib/debug/boot/vmlinux-%s.debug" +}; + +static int vmlinux_path__add(const char *new_entry) +{ + vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry); + if (vmlinux_path[vmlinux_path__nr_entries] == NULL) + return -1; + ++vmlinux_path__nr_entries; + + return 0; +} + static int vmlinux_path__init(struct perf_env *env) { struct utsname uts; char bf[PATH_MAX]; char *kernel_version; + unsigned int i; - vmlinux_path = malloc(sizeof(char *) * 6); + vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) + + ARRAY_SIZE(vmlinux_paths_upd))); if (vmlinux_path == NULL) return -1; - vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux"); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux"); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; + for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++) + if (vmlinux_path__add(vmlinux_paths[i]) < 0) + goto out_fail; /* only try kernel version if no symfs was given */ if (symbol_conf.symfs[0] != 0) @@ -1894,28 +1921,11 @@ static int vmlinux_path__init(struct perf_env *env) kernel_version = uts.release; } - snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", kernel_version); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/usr/lib/debug/boot/vmlinux-%s", - kernel_version); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", kernel_version); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux", - kernel_version); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; + for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) { + snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version); + if (vmlinux_path__add(bf) < 0) + goto out_fail; + } return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 40073c60b83d..a937053a0ae0 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -84,6 +84,7 @@ struct symbol_conf { unsigned short priv_size; unsigned short nr_events; bool try_vmlinux_path, + force, ignore_vmlinux, ignore_vmlinux_buildid, show_kernel_path, @@ -107,7 +108,10 @@ struct symbol_conf { show_hist_headers, branch_callstack, has_filter, - show_ref_callgraph; + show_ref_callgraph, + hide_unresolved, + raw_trace, + report_hierarchy; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c new file mode 100644 index 000000000000..90b47d8aa19c --- /dev/null +++ b/tools/perf/util/term.c @@ -0,0 +1,35 @@ +#include "util.h" + +void get_term_dimensions(struct winsize *ws) +{ + char *s = getenv("LINES"); + + if (s != NULL) { + ws->ws_row = atoi(s); + s = getenv("COLUMNS"); + if (s != NULL) { + ws->ws_col = atoi(s); + if (ws->ws_row && ws->ws_col) + return; + } + } +#ifdef TIOCGWINSZ + if (ioctl(1, TIOCGWINSZ, ws) == 0 && + ws->ws_row && ws->ws_col) + return; +#endif + ws->ws_row = 25; + ws->ws_col = 80; +} + +void set_term_quiet_input(struct termios *old) +{ + struct termios tc; + + tcgetattr(0, old); + tc = *old; + tc.c_lflag &= ~(ICANON | ECHO); + tc.c_cc[VMIN] = 0; + tc.c_cc[VTIME] = 0; + tcsetattr(0, TCSANOW, &tc); +} diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h new file mode 100644 index 000000000000..2c06a61846a1 --- /dev/null +++ b/tools/perf/util/term.h @@ -0,0 +1,10 @@ +#ifndef __PERF_TERM_H +#define __PERF_TERM_H + +struct termios; +struct winsize; + +void get_term_dimensions(struct winsize *ws); +void set_term_quiet_input(struct termios *old); + +#endif /* __PERF_TERM_H */ diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 0a9ae8014729..dfd00c6dad6e 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -19,8 +19,10 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine) thread->mg = map_groups__new(machine); } else { leader = __machine__findnew_thread(machine, pid, pid); - if (leader) + if (leader) { thread->mg = map_groups__get(leader->mg); + thread__put(leader); + } } return thread->mg ? 0 : -1; @@ -53,7 +55,7 @@ struct thread *thread__new(pid_t pid, pid_t tid) goto err_thread; list_add(&comm->list, &thread->comm_list); - atomic_set(&thread->refcnt, 0); + atomic_set(&thread->refcnt, 1); RB_CLEAR_NODE(&thread->rb_node); } @@ -95,6 +97,10 @@ struct thread *thread__get(struct thread *thread) void thread__put(struct thread *thread) { if (thread && atomic_dec_and_test(&thread->refcnt)) { + /* + * Remove it from the dead_threads list, as last reference + * is gone. + */ list_del_init(&thread->node); thread__delete(thread); } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 6ec3c5ca438f..08afc6909953 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -13,6 +13,7 @@ #include "thread_map.h" #include "util.h" #include "debug.h" +#include "event.h" /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) @@ -304,6 +305,7 @@ out: out_free_threads: zfree(&threads); + strlist__delete(slist); goto out; } @@ -408,3 +410,29 @@ void thread_map__read_comms(struct thread_map *threads) for (i = 0; i < threads->nr; ++i) comm_init(threads, i); } + +static void thread_map__copy_event(struct thread_map *threads, + struct thread_map_event *event) +{ + unsigned i; + + threads->nr = (int) event->nr; + + for (i = 0; i < event->nr; i++) { + thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid); + threads->map[i].comm = strndup(event->entries[i].comm, 16); + } + + atomic_set(&threads->refcnt, 1); +} + +struct thread_map *thread_map__new_event(struct thread_map_event *event) +{ + struct thread_map *threads; + + threads = thread_map__alloc(event->nr); + if (threads) + thread_map__copy_event(threads, event); + + return threads; +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index af679d8a50f8..85e4c7c4fbde 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -16,11 +16,14 @@ struct thread_map { struct thread_map_data map[]; }; +struct thread_map_event; + struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct thread_map *thread_map__new_event(struct thread_map_event *event); struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index cab8cc24831b..55de4cffcd4e 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -50,12 +50,18 @@ struct perf_tool { throttle, unthrottle; event_attr_op attr; + event_attr_op event_update; event_op2 tracing_data; event_oe finished_round; event_op2 build_id, id_index, auxtrace_info, - auxtrace_error; + auxtrace_error, + thread_map, + cpu_map, + stat_config, + stat, + stat_round; event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index 8ff7d620d942..33b52eaa39db 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -209,7 +209,7 @@ static const struct flag flags[] = { { "NET_TX_SOFTIRQ", 2 }, { "NET_RX_SOFTIRQ", 3 }, { "BLOCK_SOFTIRQ", 4 }, - { "BLOCK_IOPOLL_SOFTIRQ", 5 }, + { "IRQ_POLL_SOFTIRQ", 5 }, { "TASKLET_SOFTIRQ", 6 }, { "SCHED_SOFTIRQ", 7 }, { "HRTIMER_SOFTIRQ", 8 }, diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 802bb868d446..8ae051e0ec79 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -10,6 +10,7 @@ #include <linux/err.h> #include <traceevent/event-parse.h> #include <api/fs/tracing_path.h> +#include <api/fs/fs.h> #include "trace-event.h" #include "machine.h" #include "util.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index b85ee55cca0c..bce5b1dac268 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -65,6 +65,7 @@ int tracing_data_put(struct tracing_data *tdata); struct addr_location; struct perf_session; +struct perf_stat_config; struct scripting_ops { const char *name; @@ -75,6 +76,9 @@ struct scripting_ops { struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al); + void (*process_stat)(struct perf_stat_config *config, + struct perf_evsel *evsel, u64 tstamp); + void (*process_stat_interval)(u64 tstamp); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 4d4210d4e13d..1b741646eed0 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -19,7 +19,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) u64 quot, rem; quot = cyc >> tc->time_shift; - rem = cyc & ((1 << tc->time_shift) - 1); + rem = cyc & (((u64)1 << tc->time_shift) - 1); return tc->time_zero + quot * tc->time_mult + ((rem * tc->time_mult) >> tc->time_shift); } diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 2dcfe9a7c8d0..cf5e250bc78e 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -11,6 +11,7 @@ #include <linux/types.h> #include "event.h" #include "perf_regs.h" +#include "callchain.h" static char *debuginfo_path; @@ -52,25 +53,28 @@ static int report_module(u64 ip, struct unwind_info *ui) return __report_module(&al, ip, ui); } +/* + * Store all entries within entries array, + * we will process it after we finish unwind. + */ static int entry(u64 ip, struct unwind_info *ui) { - struct unwind_entry e; + struct unwind_entry *e = &ui->entries[ui->idx++]; struct addr_location al; if (__report_module(&al, ip, ui)) return -1; - e.ip = ip; - e.map = al.map; - e.sym = al.sym; + e->ip = ip; + e->map = al.map; + e->sym = al.sym; pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", al.sym ? al.sym->name : "''", ip, al.map ? al.map->map_ip(al.map, ip) : (u64) 0); - - return ui->cb(&e, ui->arg); + return 0; } static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp) @@ -92,6 +96,16 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, addr, &al); if (!al.map) { + /* + * We've seen cases (softice) where DWARF unwinder went + * through non executable mmaps, which we need to lookup + * in MAP__VARIABLE tree. + */ + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__VARIABLE, addr, &al); + } + + if (!al.map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); return -1; } @@ -168,7 +182,7 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct perf_sample *data, int max_stack) { - struct unwind_info ui = { + struct unwind_info *ui, ui_buf = { .sample = data, .thread = thread, .machine = thread->mg->machine, @@ -177,35 +191,54 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, .max_stack = max_stack, }; Dwarf_Word ip; - int err = -EINVAL; + int err = -EINVAL, i; if (!data->user_regs.regs) return -EINVAL; - ui.dwfl = dwfl_begin(&offline_callbacks); - if (!ui.dwfl) + ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack); + if (!ui) + return -ENOMEM; + + *ui = ui_buf; + + ui->dwfl = dwfl_begin(&offline_callbacks); + if (!ui->dwfl) goto out; err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP); if (err) goto out; - err = report_module(ip, &ui); + err = report_module(ip, ui); if (err) goto out; - if (!dwfl_attach_state(ui.dwfl, EM_NONE, thread->tid, &callbacks, &ui)) + if (!dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui)) goto out; - err = dwfl_getthread_frames(ui.dwfl, thread->tid, frame_callback, &ui); + err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui); - if (err && !ui.max_stack) + if (err && !ui->max_stack) err = 0; + /* + * Display what we got based on the order setup. + */ + for (i = 0; i < ui->idx && !err; i++) { + int j = i; + + if (callchain_param.order == ORDER_CALLER) + j = ui->idx - i - 1; + + err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0; + } + out: if (err) pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1)); - dwfl_end(ui.dwfl); + dwfl_end(ui->dwfl); + free(ui); return 0; } diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h index 417a1426f3ad..58328669ed16 100644 --- a/tools/perf/util/unwind-libdw.h +++ b/tools/perf/util/unwind-libdw.h @@ -16,6 +16,8 @@ struct unwind_info { unwind_entry_cb_t cb; void *arg; int max_stack; + int idx; + struct unwind_entry entries[]; }; #endif /* __PERF_UNWIND_LIBDW_H */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index c83832b555e5..ee7e372297e5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -319,6 +319,15 @@ static struct map *find_map(unw_word_t ip, struct unwind_info *ui) thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); + if (!al.map) { + /* + * We've seen cases (softice) where DWARF unwinder went + * through non executable mmaps, which we need to lookup + * in MAP__VARIABLE tree. + */ + thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, + MAP__VARIABLE, ip, &al); + } return al.map; } @@ -416,20 +425,19 @@ get_proc_name(unw_addr_space_t __maybe_unused as, static int access_dso_mem(struct unwind_info *ui, unw_word_t addr, unw_word_t *data) { - struct addr_location al; + struct map *map; ssize_t size; - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, addr, &al); - if (!al.map) { + map = find_map(addr, ui); + if (!map) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); return -1; } - if (!al.map->dso) + if (!map->dso) return -1; - size = dso__data_read_addr(al.map->dso, al.map, ui->machine, + size = dso__data_read_addr(map->dso, map, ui->machine, addr, (u8 *) data, sizeof(*data)); return !(size == sizeof(*data)); @@ -614,23 +622,48 @@ void unwind__finish_access(struct thread *thread) static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb, void *arg, int max_stack) { + u64 val; + unw_word_t ips[max_stack]; unw_addr_space_t addr_space; unw_cursor_t c; - int ret; - - addr_space = thread__priv(ui->thread); - if (addr_space == NULL) - return -1; + int ret, i = 0; - ret = unw_init_remote(&c, addr_space, ui); + ret = perf_reg_value(&val, &ui->sample->user_regs, PERF_REG_IP); if (ret) - display_error(ret); + return ret; - while (!ret && (unw_step(&c) > 0) && max_stack--) { - unw_word_t ip; + ips[i++] = (unw_word_t) val; - unw_get_reg(&c, UNW_REG_IP, &ip); - ret = ip ? entry(ip, ui->thread, cb, arg) : 0; + /* + * If we need more than one entry, do the DWARF + * unwind itself. + */ + if (max_stack - 1 > 0) { + addr_space = thread__priv(ui->thread); + if (addr_space == NULL) + return -1; + + ret = unw_init_remote(&c, addr_space, ui); + if (ret) + display_error(ret); + + while (!ret && (unw_step(&c) > 0) && i < max_stack) { + unw_get_reg(&c, UNW_REG_IP, &ips[i]); + ++i; + } + + max_stack = i; + } + + /* + * Display what we got based on the order setup. + */ + for (i = 0; i < max_stack && !ret; i++) { + int j = i; + + if (callchain_param.order == ORDER_CALLER) + j = max_stack - i - 1; + ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0; } return ret; @@ -640,24 +673,17 @@ int unwind__get_entries(unwind_entry_cb_t cb, void *arg, struct thread *thread, struct perf_sample *data, int max_stack) { - u64 ip; struct unwind_info ui = { .sample = data, .thread = thread, .machine = thread->mg->machine, }; - int ret; if (!data->user_regs.regs) return -EINVAL; - ret = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP); - if (ret) - return ret; - - ret = entry(ip, thread, cb, arg); - if (ret) - return -ENOMEM; + if (max_stack <= 0) + return -EINVAL; - return --max_stack > 0 ? get_entries(&ui, cb, arg, max_stack) : 0; + return get_entries(&ui, cb, arg, max_stack); } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index cd12c25e4ea4..b7766c577b01 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -3,6 +3,7 @@ #include "debug.h" #include <api/fs/fs.h> #include <sys/mman.h> +#include <sys/utsname.h> #ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #endif @@ -13,14 +14,17 @@ #include <limits.h> #include <byteswap.h> #include <linux/kernel.h> +#include <linux/log2.h> #include <unistd.h> #include "callchain.h" +#include "strlist.h" struct callchain_param callchain_param = { .mode = CHAIN_GRAPH_ABS, .min_percent = 0.5, .order = ORDER_CALLEE, - .key = CCKEY_FUNCTION + .key = CCKEY_FUNCTION, + .value = CCVAL_PERCENT, }; /* @@ -350,41 +354,8 @@ void sighandler_dump_stack(int sig) { psignal(sig, "perf"); dump_stack(); - exit(sig); -} - -void get_term_dimensions(struct winsize *ws) -{ - char *s = getenv("LINES"); - - if (s != NULL) { - ws->ws_row = atoi(s); - s = getenv("COLUMNS"); - if (s != NULL) { - ws->ws_col = atoi(s); - if (ws->ws_row && ws->ws_col) - return; - } - } -#ifdef TIOCGWINSZ - if (ioctl(1, TIOCGWINSZ, ws) == 0 && - ws->ws_row && ws->ws_col) - return; -#endif - ws->ws_row = 25; - ws->ws_col = 80; -} - -void set_term_quiet_input(struct termios *old) -{ - struct termios tc; - - tcgetattr(0, old); - tc = *old; - tc.c_lflag &= ~(ICANON | ECHO); - tc.c_cc[VMIN] = 0; - tc.c_cc[VTIME] = 0; - tcsetattr(0, TCSANOW, &tc); + signal(sig, SIG_DFL); + raise(sig); } int parse_nsec_time(const char *str, u64 *ptime) @@ -537,54 +508,6 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) return ret; } -int filename__read_str(const char *filename, char **buf, size_t *sizep) -{ - size_t size = 0, alloc_size = 0; - void *bf = NULL, *nbf; - int fd, n, err = 0; - char sbuf[STRERR_BUFSIZE]; - - fd = open(filename, O_RDONLY); - if (fd < 0) - return -errno; - - do { - if (size == alloc_size) { - alloc_size += BUFSIZ; - nbf = realloc(bf, alloc_size); - if (!nbf) { - err = -ENOMEM; - break; - } - - bf = nbf; - } - - n = read(fd, bf + size, alloc_size - size); - if (n < 0) { - if (size) { - pr_warning("read failed %d: %s\n", errno, - strerror_r(errno, sbuf, sizeof(sbuf))); - err = 0; - } else - err = -errno; - - break; - } - - size += n; - } while (n > 0); - - if (!err) { - *sizep = size; - *buf = bf; - } else - free(bf); - - close(fd); - return err; -} - const char *get_filename_for_perf_kvm(void) { const char *filename; @@ -665,3 +588,122 @@ bool find_process(const char *name) closedir(dir); return ret ? false : true; } + +int +fetch_kernel_version(unsigned int *puint, char *str, + size_t str_size) +{ + struct utsname utsname; + int version, patchlevel, sublevel, err; + + if (uname(&utsname)) + return -1; + + if (str && str_size) { + strncpy(str, utsname.release, str_size); + str[str_size - 1] = '\0'; + } + + err = sscanf(utsname.release, "%d.%d.%d", + &version, &patchlevel, &sublevel); + + if (err != 3) { + pr_debug("Unablt to get kernel version from uname '%s'\n", + utsname.release); + return -1; + } + + if (puint) + *puint = (version << 16) + (patchlevel << 8) + sublevel; + return 0; +} + +const char *perf_tip(const char *dirpath) +{ + struct strlist *tips; + struct str_node *node; + char *tip = NULL; + struct strlist_config conf = { + .dirname = dirpath, + .file_only = true, + }; + + tips = strlist__new("tips.txt", &conf); + if (tips == NULL) + return errno == ENOENT ? NULL : "Tip: get more memory! ;-p"; + + if (strlist__nr_entries(tips) == 0) + goto out; + + node = strlist__entry(tips, random() % strlist__nr_entries(tips)); + if (asprintf(&tip, "Tip: %s", node->s) < 0) + tip = (char *)"Tip: get more memory! ;-)"; + +out: + strlist__delete(tips); + + return tip; +} + +bool is_regular_file(const char *file) +{ + struct stat st; + + if (stat(file, &st)) + return false; + + return S_ISREG(st.st_mode); +} + +int fetch_current_timestamp(char *buf, size_t sz) +{ + struct timeval tv; + struct tm tm; + char dt[32]; + + if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm)) + return -1; + + if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm)) + return -1; + + scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000); + + return 0; +} + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra) +{ + size_t i, j, mask; + + if (!printer) + return; + + bytes_per_line = roundup_pow_of_two(bytes_per_line); + mask = bytes_per_line - 1; + + printer(BINARY_PRINT_DATA_BEGIN, 0, extra); + for (i = 0; i < len; i++) { + if ((i & mask) == 0) { + printer(BINARY_PRINT_LINE_BEGIN, -1, extra); + printer(BINARY_PRINT_ADDR, i, extra); + } + + printer(BINARY_PRINT_NUM_DATA, data[i], extra); + + if (((i & mask) == mask) || i == len - 1) { + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_NUM_PAD, -1, extra); + + printer(BINARY_PRINT_SEP, i, extra); + for (j = i & ~mask; j <= i; j++) + printer(BINARY_PRINT_CHAR_DATA, data[j], extra); + for (j = 0; j < mask-(i & mask); j++) + printer(BINARY_PRINT_CHAR_PAD, i, extra); + printer(BINARY_PRINT_LINE_END, -1, extra); + } + } + printer(BINARY_PRINT_DATA_END, -1, extra); +} diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 4cfb913aa9e0..d0d50cef8b2a 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -53,6 +53,7 @@ #include <stdlib.h> #include <stdarg.h> #include <string.h> +#include <term.h> #include <errno.h> #include <limits.h> #include <sys/param.h> @@ -81,6 +82,8 @@ extern const char *graph_line; extern const char *graph_dotted_line; +extern const char *spaces; +extern const char *dots; extern char buildid_dir[]; /* On most systems <limits.h> would have given us this, but @@ -150,12 +153,6 @@ extern void set_warning_routine(void (*routine)(const char *err, va_list params) extern int prefixcmp(const char *str, const char *prefix); extern void set_buildid_dir(const char *dir); -static inline const char *skip_prefix(const char *str, const char *prefix) -{ - size_t len = strlen(prefix); - return strncmp(str, prefix, len) ? NULL : str + len; -} - #ifdef __GLIBC_PREREQ #if __GLIBC_PREREQ(2, 1) #define HAVE_STRCHRNUL @@ -186,14 +183,6 @@ static inline void *zalloc(size_t size) #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) -static inline int has_extension(const char *filename, const char *ext) -{ - size_t len = strlen(filename); - size_t extlen = strlen(ext); - - return len > extlen && !memcmp(filename + len - extlen, ext, extlen); -} - /* Sane ctype - no locale, and works with signed chars */ #undef isascii #undef isspace @@ -282,9 +271,6 @@ void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; -void get_term_dimensions(struct winsize *ws); -void set_term_quiet_input(struct termios *old); - struct parse_tag { char tag; int mult; @@ -319,7 +305,6 @@ char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, bool show_sym, bool unwind_inlines); void free_srcline(char *srcline); -int filename__read_str(const char *filename, char **buf, size_t *sizep); int perf_event_paranoid(void); void mem_bswap_64(void *src, int byte_size); @@ -350,4 +335,36 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int int get_stack_size(const char *str, unsigned long *_size); +int fetch_kernel_version(unsigned int *puint, + char *str, size_t str_sz); +#define KVER_VERSION(x) (((x) >> 16) & 0xff) +#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) +#define KVER_SUBLEVEL(x) ((x) & 0xff) +#define KVER_FMT "%d.%d.%d" +#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) + +const char *perf_tip(const char *dirpath); +bool is_regular_file(const char *file); +int fetch_current_timestamp(char *buf, size_t sz); + +enum binary_printer_ops { + BINARY_PRINT_DATA_BEGIN, + BINARY_PRINT_LINE_BEGIN, + BINARY_PRINT_ADDR, + BINARY_PRINT_NUM_DATA, + BINARY_PRINT_NUM_PAD, + BINARY_PRINT_SEP, + BINARY_PRINT_CHAR_DATA, + BINARY_PRINT_CHAR_PAD, + BINARY_PRINT_LINE_END, + BINARY_PRINT_DATA_END, +}; + +typedef void (*print_binary_t)(enum binary_printer_ops, + unsigned int val, + void *extra); + +void print_binary(unsigned char *data, size_t len, + size_t bytes_per_line, print_binary_t printer, + void *extra); #endif /* GIT_COMPAT_UTIL_H */ |