diff options
Diffstat (limited to 'tools/perf')
309 files changed, 14031 insertions, 4656 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index db11478e30b4..ac841bc5c35b 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -47,7 +47,8 @@ man5dir=$(mandir)/man5 man7dir=$(mandir)/man7 ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe +ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf +ASCIIDOC_HTML = xhtml11 MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install @@ -55,6 +56,14 @@ RM ?= rm -f DOC_REF = origin/man HTML_REF = origin/html +ifdef USE_ASCIIDOCTOR +ASCIIDOC = asciidoctor +ASCIIDOC_EXTRA = -a compat-mode +ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions +ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual" +ASCIIDOC_HTML = xhtml5 +endif + infodir?=$(prefix)/share/info MAKEINFO=makeinfo INSTALL_INFO=install-info @@ -73,10 +82,12 @@ ifeq ($(_tmp_tool_path),) missing_tools = $(ASCIIDOC) endif +ifndef USE_ASCIIDOCTOR _tmp_tool_path := $(call get-executable,$(XMLTO)) ifeq ($(_tmp_tool_path),) missing_tools += $(XMLTO) endif +endif # # For asciidoc ... @@ -264,9 +275,17 @@ clean: $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ + $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ + $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ + mv $@+ $@ + +ifdef USE_ASCIIDOCTOR +$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt + $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ + $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ +endif $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(QUIET_XMLTO)$(RM) $@ && \ @@ -274,7 +293,7 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ + $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ mv $@+ $@ @@ -321,13 +340,13 @@ howto-index.txt: howto-index.sh $(wildcard howto/*.txt) mv $@+ $@ $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt - $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt + $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt WEBDOC_DEST = /pub/software/tools/perf/docs $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ + sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \ mv $@+ $@ # UNIMPLEMENTED diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb new file mode 100644 index 000000000000..d148fe95c0c4 --- /dev/null +++ b/tools/perf/Documentation/asciidoctor-extensions.rb @@ -0,0 +1,29 @@ +require 'asciidoctor' +require 'asciidoctor/extensions' + +module Perf + module Documentation + class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor + use_dsl + + named :chrome + + def process(parent, target, attrs) + if parent.document.basebackend? 'html' + %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n) + elsif parent.document.basebackend? 'manpage' + "#{target}(#{attrs[1]})" + elsif parent.document.basebackend? 'docbook' + "<citerefentry>\n" \ + "<refentrytitle>#{target}</refentrytitle>" \ + "<manvolnum>#{attrs[1]}</manvolnum>\n" \ + "</citerefentry>\n" + end + end + end + end +end + +Asciidoctor::Extensions.register do + inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf +end diff --git a/tools/perf/Documentation/build-xed.txt b/tools/perf/Documentation/build-xed.txt new file mode 100644 index 000000000000..6222c1e7231f --- /dev/null +++ b/tools/perf/Documentation/build-xed.txt @@ -0,0 +1,19 @@ + +For --xed the xed tool is needed. Here is how to install it: + + $ git clone https://github.com/intelxed/mbuild.git mbuild + $ git clone https://github.com/intelxed/xed + $ cd xed + $ ./mfile.py --share + $ ./mfile.py examples + $ sudo ./mfile.py --prefix=/usr/local install + $ sudo ldconfig + $ sudo cp obj/examples/xed /usr/local/bin + +Basic xed testing: + + $ xed | head -3 + ERROR: required argument(s) were missing + Copyright (C) 2017, Intel Corporation. All rights reserved. + XED version: [v10.0-328-g7d62c8c49b7b] + $ diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index 76971d2e4164..115eaacc455f 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -106,7 +106,7 @@ in transaction, respectively. While it is possible to create scripts to analyze the data, an alternative approach is available to export the data to a sqlite or postgresql database. Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, -and to script call-graph-from-sql.py for an example of using the database. +and to script exported-sql-viewer.py for an example of using the database. There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index a3abe04c779d..c2182cbabde3 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -11,10 +11,11 @@ l synthesize last branch entries (use with i or x) s skip initial number of events - The default is all events i.e. the same as --itrace=ibxwpe + The default is all events i.e. the same as --itrace=ibxwpe, + except for perf script where it is --itrace=ce - In addition, the period (default 100000) for instructions events - can be specified in units of: + In addition, the period (default 100000, except for perf script where it is 1) + for instructions events can be specified in units of: i instructions t ticks diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 749cc6055dac..e8c972f89357 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -118,6 +118,15 @@ OPTIONS --group:: Show event group information together +--percent-type:: + Set annotation percent type from following choices: + global-period, local-period, global-hits, local-hits + + The local/global keywords set if the percentage is computed + in the scope of the function (local) or the whole data (global). + The period/hits keywords set the base the percentage is computed + on - the samples period or the number of samples (hits). + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 73c2650bd0db..f6de0952ff3c 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -48,6 +48,9 @@ OPTIONS --purge=:: Purge all cached binaries including older caches which have specified path from the cache. +-P:: +--purge-all:: + Purge all cached binaries. This will flush out entire cache. -M:: --missing=:: List missing build ids in the cache for the specified file. @@ -59,7 +62,9 @@ OPTIONS exactly same build-id, that is replaced by new one. It can be used to update kallsyms and kernel dso to vmlinux in order to support annotation. - +-l:: +--list:: + List all valid binaries from cache. -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 2549c34a7895..667c14e56031 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -18,6 +18,10 @@ various perf commands with the -e option. OPTIONS ------- +-d:: +--desc:: +Print extra event descriptions. (default) + --no-desc:: Don't print descriptions. @@ -25,11 +29,13 @@ Don't print descriptions. --long-desc:: Print longer event descriptions. +--debug:: +Enable debugging output. + --details:: Print how named events are resolved internally into perf events, and also any extra expressions computed by perf stat. - [[EVENT_MODIFIERS]] EVENT MODIFIERS --------------- @@ -49,7 +55,6 @@ counted. The following modifiers exist: S - read sample value (PERF_SAMPLE_READ) D - pin the event to the PMU W - group is weak and will fallback to non-group if not schedulable, - only supported in 'perf stat' for now. The 'p' modifier can be used for specifying how precise the instruction address should be. The 'p' modifier can be specified multiple times: @@ -124,7 +129,11 @@ The available PMUs and their raw parameters can be listed with For example the raw event "LSD.UOPS" core pmu event above could be specified as - perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=1/ ... + perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=0x1/ ... + + or using extended name syntax + + perf stat -e cpu/event=0xa8,umask=0x1,cmask=0x1,name=\'LSD.UOPS_CYCLES:cmask=0x1\'/ ... PER SOCKET PMUS --------------- @@ -230,7 +239,7 @@ perf also supports group leader sampling using the :S specifier. perf record -e '{cycles,instructions}:S' ... perf report --group -Normally all events in a event group sample, but with :S only +Normally all events in an event group sample, but with :S only the first event (the leader) samples, and it only reads the values of the other events in the group. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index cc37b3a4be76..246dee081efd 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -57,6 +57,9 @@ OPTIONS FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and "no" for disable callgraph. - 'stack-size': user stack size for dwarf mode + - 'name' : User defined event name. Single quotes (') may be used to + escape symbols in the name from parsing by shell and tool + like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'. See the linkperf:perf-list[1] man page for more parameters. @@ -91,7 +94,7 @@ OPTIONS "perf report" to view group events together. --filter=<filter>:: - Event filter. This option should follow a event selector (-e) which + Event filter. This option should follow an event selector (-e) which selects either tracepoint event(s) or a hardware trace PMU (e.g. Intel PT or CoreSight). @@ -150,7 +153,7 @@ OPTIONS --exclude-perf:: Don't record events issued by perf itself. This option should follow - a event selector (-e) which selects tracepoint event(s). It adds a + an event selector (-e) which selects tracepoint event(s). It adds a filter expression 'common_pid != $PERFPID' to filters. If other '--filter' exists, the new filter expression will be combined with them by '&&'. diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 917e36fde6d8..474a4941f65d 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -477,6 +477,15 @@ include::itrace.txt[] Display monitored tasks stored in perf data. Displaying pid/tid/ppid plus the command string aligned to distinguish parent and child tasks. +--percent-type:: + Set annotation percent type from following choices: + global-period, local-period, global-hits, local-hits + + The local/global keywords set if the percentage is computed + in the scope of the function (local) or the whole data (global). + The period/hits keywords set the base the percentage is computed + on - the samples period or the number of samples (hits). + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 51ec2d20068a..0fb9eda3cbca 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -610,6 +610,32 @@ Various utility functions for use with perf script: nsecs_str(nsecs) - returns printable string in the form secs.nsecs avg(total, n) - returns average given a sum and a total number of values +SUPPORTED FIELDS +---------------- + +Currently supported fields: + +ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr, +symbol, dso, time_enabled, time_running, values, callchain, +brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs, +weight, transaction, raw_buf, attr. + +Some fields have sub items: + +brstack: + from, to, from_dsoname, to_dsoname, mispred, + predicted, in_tx, abort, cycles. + +brstacksym: + items: from, to, pred, in_tx, abort (converted string) + +For example, +We can use this code to print brstack "from", "to", "cycles". + +if 'brstack' in dict: + for entry in dict['brstack']: + print "from %s, to %s, cycles %s" % (entry["from"], entry["to"], entry["cycles"]) + SEE ALSO -------- linkperf:perf-script[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index afdafe2110a1..a2b37ce48094 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -383,6 +383,24 @@ include::itrace.txt[] will be printed. Each entry has function name and file/line. Enabled by default, disable with --no-inline. +--insn-trace:: + Show instruction stream for intel_pt traces. Combine with --xed to + show disassembly. + +--xed:: + Run xed disassembler on output. Requires installing the xed disassembler. + +--call-trace:: + Show call stream for intel_pt traces. The CPUs are interleaved, but + can be filtered with -C. + +--call-ret-trace:: + Show call and return stream for intel_pt traces. + +--graph-function:: + For itrace only show specified functions and their callees for + itrace. Multiple functions can be separated by comma. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index e6c3b4e555c2..b10a90b6a718 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -116,6 +116,22 @@ Do not aggregate counts across all monitored CPUs. print counts using a CSV-style output to make it easy to import directly into spreadsheets. Columns are separated by the string specified in SEP. +--table:: Display time for each run (-r option), in a table format, e.g.: + + $ perf stat --null -r 5 --table perf bench sched pipe + + Performance counter stats for 'perf bench sched pipe' (5 runs): + + # Table of individual measurements: + 5.189 (-0.293) # + 5.189 (-0.294) # + 5.186 (-0.296) # + 5.663 (+0.181) ## + 6.186 (+0.703) #### + + # Final result: + 5.483 +- 0.198 seconds time elapsed ( +- 3.62% ) + -G name:: --cgroup name:: monitor only in the container (cgroup) called "name". This option is available only @@ -162,6 +178,9 @@ Print count deltas for fixed number of times. This option should be used together with "-I" option. example: 'perf stat -I 1000 --interval-count 2 -e cycles -a' +--interval-clear:: +Clear the screen before next interval. + --timeout msecs:: Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms). This option is not supported with the "-I" option. @@ -294,20 +313,38 @@ Users who wants to get the actual value can apply --no-metric-only. EXAMPLES -------- -$ perf stat -- make -j +$ perf stat -- make + + Performance counter stats for 'make': + + 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized + 0 context-switches:u # 0.000 K/sec + 0 cpu-migrations:u # 0.000 K/sec + 3,228,188 page-faults:u # 0.039 M/sec + 229,570,665,834 cycles:u # 2.742 GHz + 313,163,853,778 instructions:u # 1.36 insn per cycle + 69,704,684,856 branches:u # 832.559 M/sec + 2,078,861,393 branch-misses:u # 2.98% of all branches + + 83.409183620 seconds time elapsed + + 74.684747000 seconds user + 8.739217000 seconds sys + +TIMINGS +------- +As displayed in the example above we can display 3 types of timings. +We always display the time the counters were enabled/alive: + + 83.409183620 seconds time elapsed - Performance counter stats for 'make -j': +For workload sessions we also display time the workloads spent in +user/system lands: - 8117.370256 task clock ticks # 11.281 CPU utilization factor - 678 context switches # 0.000 M/sec - 133 CPU migrations # 0.000 M/sec - 235724 pagefaults # 0.029 M/sec - 24821162526 CPU cycles # 3057.784 M/sec - 18687303457 instructions # 2302.138 M/sec - 172158895 cache references # 21.209 M/sec - 27075259 cache misses # 3.335 M/sec + 74.684747000 seconds user + 8.739217000 seconds sys - Wall-clock time elapsed: 719.554352 msecs +Those times are the very same as displayed by the 'time' tool. CSV FORMAT ---------- diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 114fda12aa49..808b664343c9 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -242,6 +242,16 @@ Default is to monitor all CPUS. --hierarchy:: Enable hierarchy output. +--overwrite:: + Enable this to use just the most recent records, which helps in high core count + machines such as Knights Landing/Mill, but right now is disabled by default as + the pausing used in this technique is leading to loss of metadata events such + as PERF_RECORD_MMAP which makes 'perf top' unable to resolve samples, leading + to lots of unknown samples appearing on the UI. Enable this if you are in such + machines and profiling a workload that doesn't creates short lived threads and/or + doesn't uses many executable mmap operations. Work is being planed to solve + this situation, till then, this will remain disabled by default. + --force:: Don't do ownership validation. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 115db9e06ecd..e113450503d2 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -171,6 +171,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --kernel-syscall-graph:: Show the kernel callchains on the syscall exit path. +--max-events=N:: + Stop after processing N events. Note that strace-like events are considered + only at exit time or when a syscall is interrupted, i.e. in those cases this + option is equivalent to the number of lines printed. + --max-stack:: Set the stack depth limit when parsing the callchain, anything beyond the specified depth will be ignored. Note that at this point @@ -238,6 +243,68 @@ Trace syscalls, major and minor pagefaults: As you can see, there was major pagefault in python process, from CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so. +Trace the first 4 open, openat or open_by_handle_at syscalls (in the future more syscalls may match here): + + $ perf trace -e open* --max-events 4 + [root@jouet perf]# trace -e open* --max-events 4 + 2272.992 ( 0.037 ms): gnome-shell/1370 openat(dfd: CWD, filename: /proc/self/stat) = 31 + 2277.481 ( 0.139 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 3026.398 ( 0.076 ms): gnome-shell/3039 openat(dfd: CWD, filename: /proc/self/stat) = 65 + 4294.665 ( 0.015 ms): sed/15879 openat(dfd: CWD, filename: /etc/ld.so.cache, flags: CLOEXEC) = 3 + $ + +Trace the first minor page fault when running a workload: + + # perf trace -F min --max-stack=7 --max-events 1 sleep 1 + 0.000 ( 0.000 ms): sleep/18006 minfault [__clear_user+0x1a] => 0x5626efa56080 (?k) + __clear_user ([kernel.kallsyms]) + load_elf_binary ([kernel.kallsyms]) + search_binary_handler ([kernel.kallsyms]) + __do_execve_file.isra.33 ([kernel.kallsyms]) + __x64_sys_execve ([kernel.kallsyms]) + do_syscall_64 ([kernel.kallsyms]) + entry_SYSCALL_64 ([kernel.kallsyms]) + # + +Trace the next min page page fault to take place on the first CPU: + + # perf trace -F min --call-graph=dwarf --max-events 1 --cpu 0 + 0.000 ( 0.000 ms): Web Content/17136 minfault [js::gc::Chunk::fetchNextDecommittedArena+0x4b] => 0x7fbe6181b000 (?.) + js::gc::FreeSpan::initAsEmpty (inlined) + js::gc::Arena::setAsNotAllocated (inlined) + js::gc::Chunk::fetchNextDecommittedArena (/usr/lib64/firefox/libxul.so) + js::gc::Chunk::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::allocateArena (/usr/lib64/firefox/libxul.so) + js::gc::ArenaLists::allocateFromArena (/usr/lib64/firefox/libxul.so) + js::gc::GCRuntime::tryNewTenuredThing<JSString, (js::AllowGC)1> (inlined) + js::AllocateString<JSString, (js::AllowGC)1> (/usr/lib64/firefox/libxul.so) + js::Allocate<JSThinInlineString, (js::AllowGC)1> (inlined) + JSThinInlineString::new_<(js::AllowGC)1> (inlined) + AllocateInlineString<(js::AllowGC)1, unsigned char> (inlined) + js::ConcatStrings<(js::AllowGC)1> (/usr/lib64/firefox/libxul.so) + [0x18b26e6bc2bd] (/tmp/perf-17136.map) + # + +Trace the next two sched:sched_switch events, four block:*_plug events, the +next block:*_unplug and the next three net:*dev_queue events, this last one +with a backtrace of at most 16 entries, system wide: + + # perf trace -e sched:*switch/nr=2/,block:*_plug/nr=4/,block:*_unplug/nr=1/,net:*dev_queue/nr=3,max-stack=16/ + 0.000 :0/0 sched:sched_switch:swapper/2:0 [120] S ==> rcu_sched:10 [120] + 0.015 rcu_sched/10 sched:sched_switch:rcu_sched:10 [120] R ==> swapper/2:0 [120] + 254.198 irq/50-iwlwifi/680 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=66 + __dev_queue_xmit ([kernel.kallsyms]) + 273.977 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051f600 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 274.007 :0/0 net:net_dev_queue:dev=wlp3s0 skbaddr=0xffff93498051ff00 len=78 + __dev_queue_xmit ([kernel.kallsyms]) + 2930.140 kworker/u16:58/2722 block:block_plug:[kworker/u16:58] + 2930.162 kworker/u16:58/2722 block:block_unplug:[kworker/u16:58] 1 + 4466.094 jbd2/dm-2-8/748 block:block_plug:[jbd2/dm-2-8] + 8050.123 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + 8050.271 kworker/u16:30/2694 block:block_plug:[kworker/u16:30] + # + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 225454416ed5..7902a5681fc8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -84,10 +84,10 @@ endif # has_clean endif # MAKECMDGOALS # -# The clean target is not really parallel, don't print the jobs info: +# Explicitly disable parallelism for the clean target. # clean: - $(make) + $(make) -j1 # # The build-test target is not really parallel, don't print the jobs info, diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index ae7dc46e8f8a..a0e8c23f9125 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -54,6 +54,8 @@ endif ifeq ($(SRCARCH),arm64) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -I$(OUTPUT)arch/arm64/include/generated LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif @@ -207,8 +209,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null) PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil - PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) + PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -298,6 +299,11 @@ ifndef NO_BIONIC endif endif +ifeq ($(feature-get_current_dir_name), 1) + CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME +endif + + ifdef NO_LIBELF NO_DWARF := 1 NO_DEMANGLE := 1 @@ -832,7 +838,7 @@ ifndef NO_JVMTI JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}') else ifneq (,$(wildcard /usr/sbin/alternatives)) - JDIR=$(shell alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') + JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed 's%/jre/bin/java.%%g') endif endif ifndef JDIR @@ -885,6 +891,8 @@ endif # Among the variables below, these: # perfexecdir +# perf_include_dir +# perf_examples_dir # template_dir # mandir # infodir @@ -904,6 +912,8 @@ bindir = $(abspath $(prefix)/$(bindir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core +perf_include_dir = lib/perf/include +perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates STRACE_GROUPS_DIR = share/perf-core/strace/groups @@ -934,6 +944,8 @@ bindir_SQ = $(subst ','\'',$(bindir)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) +perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir)) +perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) tipdir_SQ = $(subst ','\'',$(tipdir)) @@ -944,14 +956,20 @@ srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) +perf_include_instdir = $(perf_include_dir) +perf_examples_instdir = $(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) +perf_include_instdir = $(prefix)/$(perf_include_dir) +perf_examples_instdir = $(prefix)/$(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) tip_instdir = $(prefix)/$(tipdir) endif perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) +perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir)) +perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir)) STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) tip_instdir_SQ = $(subst ','\'',$(tip_instdir)) @@ -999,6 +1017,8 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) +$(call detected_var,perf_include_dir_SQ) +$(call detected_var,perf_examples_dir_SQ) $(call detected_var,tipdir_SQ) $(call detected_var,srcdir_SQ) $(call detected_var,LIBDIR) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 83e453de36f8..d95655489f7e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1,4 +1,5 @@ include ../scripts/Makefile.include +include ../scripts/Makefile.arch # The default target of this Makefile is... all: @@ -384,6 +385,10 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) +linux_uapi_dir := $(srctree)/tools/include/uapi/linux +asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic +arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/ + beauty_outdir := $(OUTPUT)trace/beauty/generated beauty_ioctl_outdir := $(beauty_outdir)/ioctl drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c @@ -431,6 +436,12 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh $(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl) $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@ +socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c +socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh + +$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl) + $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@ + vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -452,6 +463,18 @@ madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl) $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@ +mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c +mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh + +$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(arch_asm_uapi_dir)/mman.h $(mmap_flags_tbl) + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ + +mount_flags_array := $(beauty_outdir)/mount_flags_array.c +mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh + +$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) + $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ + prctl_option_array := $(beauty_outdir)/prctl_option_array.c prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh @@ -566,8 +589,11 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(sndrv_ctl_ioctl_array) \ $(kcmp_type_array) \ $(kvm_ioctl_array) \ + $(socket_ipproto_array) \ $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ + $(mmap_flags_array) \ + $(mount_flags_array) \ $(perf_ioctl_array) \ $(prctl_option_array) \ $(arch_errno_name_array) @@ -626,7 +652,7 @@ $(LIBPERF_IN): prepare FORCE $(LIB_FILE): $(LIBPERF_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a @@ -767,6 +793,16 @@ ifndef NO_JVMTI endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' +ifndef NO_LIBBPF + $(call QUIET_INSTALL, bpf-headers) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \ + $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ + $(INSTALL) include/bpf/linux/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux' + $(call QUIET_INSTALL, bpf-examples) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ + $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' +endif $(call QUIET_INSTALL, perf-archive) \ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' $(call QUIET_INSTALL, perf-with-kcore) \ @@ -844,12 +880,15 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(madvise_behavior_array) \ + $(OUTPUT)$(mmap_flags_array) \ + $(OUTPUT)$(mount_flags_array) \ $(OUTPUT)$(drm_ioctl_array) \ $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ $(OUTPUT)$(kcmp_type_array) \ + $(OUTPUT)$(socket_ipproto_array) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ $(OUTPUT)$(prctl_option_array) \ diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 8cb347760233..9a0242e74cfc 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c @@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 91de4860faad..dbef716a1913 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -4,3 +4,25 @@ PERF_HAVE_DWARF_REGS := 1 endif PERF_HAVE_JITDUMP := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/arm64/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, arm64) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 6688977e4ac7..76c6345a57d5 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -8,6 +8,63 @@ struct arm64_annotate { jump_insn; }; +static int arm64_mov__parse(struct arch *arch __maybe_unused, + struct ins_operands *ops, + struct map_symbol *ms __maybe_unused) +{ + char *s = strchr(ops->raw, ','), *target, *endptr; + + if (s == NULL) + return -1; + + *s = '\0'; + ops->source.raw = strdup(ops->raw); + *s = ','; + + if (ops->source.raw == NULL) + return -1; + + target = ++s; + ops->target.raw = strdup(target); + if (ops->target.raw == NULL) + goto out_free_source; + + ops->target.addr = strtoull(target, &endptr, 16); + if (endptr == target) + goto out_free_target; + + s = strchr(endptr, '<'); + if (s == NULL) + goto out_free_target; + endptr = strchr(s + 1, '>'); + if (endptr == NULL) + goto out_free_target; + + *endptr = '\0'; + *s = ' '; + ops->target.name = strdup(s); + *s = '<'; + *endptr = '>'; + if (ops->target.name == NULL) + goto out_free_target; + + return 0; + +out_free_target: + zfree(&ops->target.raw); +out_free_source: + zfree(&ops->source.raw); + return -1; +} + +static int mov__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops); + +static struct ins_ops arm64_mov_ops = { + .parse = arm64_mov__parse, + .scnprintf = mov__scnprintf, +}; + static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name) { struct arm64_annotate *arm = arch->priv; @@ -21,7 +78,7 @@ static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const else if (!strcmp(name, "ret")) ops = &ret_ops; else - return NULL; + ops = &arm64_mov_ops; arch__associate_ins_ops(arch, name, ops); return ops; diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..c88fd32563eb --- /dev/null +++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl @@ -0,0 +1,62 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> +# Changed by: Kim Phillips <kim.phillips@arm.com> + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table_from_c() +{ + local sc nr last_sc + + create_table_exe=`mktemp ${TMPDIR:-/tmp}/create-table-XXXXXX` + + { + + cat <<-_EoHEADER + #include <stdio.h> + #include "$input" + int main(int argc, char *argv[]) + { + _EoHEADER + + while read sc nr; do + printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", __NR_$sc);" + last_sc=$sc + done + + printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);" + printf "}\n" + + } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c - + + $create_table_exe + + rm -f $create_table_exe +} + +create_table() +{ + echo "static const char *syscalltbl_arm64[] = {" + create_table_from_c + echo "};" +} + +$gcc -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index e907f0f4c20c..5522ce384723 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -25,7 +25,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_ARM64_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 1120e39c1b00..5ccfce87e693 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -194,6 +194,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err, sper->itr.read_finish = arm_spe_read_finish; sper->itr.alignment = 0; + *err = 0; return &sper->itr; } diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index c6f373508a4f..82657c01a3b8 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -189,7 +189,7 @@ out_error: return -1; } -int perf_env__lookup_objdump(struct perf_env *env) +int perf_env__lookup_objdump(struct perf_env *env, const char **path) { /* * For live mode, env->arch will be NULL and we can use @@ -198,5 +198,5 @@ int perf_env__lookup_objdump(struct perf_env *env) if (env->arch == NULL) return 0; - return perf_env__lookup_binutils_path(env, "objdump", &objdump_path); + return perf_env__lookup_binutils_path(env, "objdump", path); } diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index 2d875baa92e6..2167001b18c5 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -4,8 +4,6 @@ #include "../util/env.h" -extern const char *objdump_path; - -int perf_env__lookup_objdump(struct perf_env *env); +int perf_env__lookup_objdump(struct perf_env *env, const char **path); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 30cbbd6d5be0..5f39efef0856 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_POWERPC_R1]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h index 853b95d1e139..2011376c7ab5 100644 --- a/tools/perf/arch/powerpc/util/book3s_hv_exits.h +++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h @@ -15,7 +15,6 @@ {0x400, "INST_STORAGE"}, \ {0x480, "INST_SEGMENT"}, \ {0x500, "EXTERNAL"}, \ - {0x501, "EXTERNAL_LEVEL"}, \ {0x502, "EXTERNAL_HV"}, \ {0x600, "ALIGNMENT"}, \ {0x700, "PROGRAM"}, \ diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 0c370f81e002..7c6eeb4633fe 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -58,9 +58,13 @@ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) } /* - * Check if return address is on the stack. + * Check if return address is on the stack. If return address + * is in a register (typically R0), it is yet to be saved on + * the stack. */ - if (nops != 0 || ops != NULL) + if ((nops != 0 || ops != NULL) && + !(nops == 1 && ops[0].atom == DW_OP_regx && + ops[0].number2 == 0 && ops[0].offset == 0)) return 0; /* @@ -243,13 +247,12 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) u64 ip; u64 skip_slot = -1; - if (chain->nr < 3) + if (!chain || chain->nr < 3) return skip_slot; - ip = chain->ips[2]; + ip = chain->ips[1]; - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); + thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); if (al.map) dso = al.map->dso; diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 53d83d7e6a09..10a44e946f77 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -22,15 +22,16 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) #endif -#if !defined(_CALL_ELF) || _CALL_ELF != 2 int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb __maybe_unused) { char *sym = syma->name; +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Skip over any initial dot */ if (*sym == '.') sym++; +#endif /* Avoid "SyS" kernel syscall aliases */ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) @@ -41,6 +42,7 @@ int arch__choose_best_symbol(struct symbol *syma, return SYMBOL_A; } +#if !defined(_CALL_ELF) || _CALL_ELF != 2 /* Allow matching against dot variants */ int arch__compare_symbol_names(const char *namea, const char *nameb) { @@ -141,8 +143,10 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, for (i = 0; i < ntevs; i++) { tev = &pev->tevs[i]; map__for_each_symbol(map, sym, tmp) { - if (map->unmap_ip(map, sym->start) == tev->point.address) + if (map->unmap_ip(map, sym->start) == tev->point.address) { arch__fix_tev_from_maps(pev, tev, map, sym); + break; + } } } } diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index cee4e2f7c057..de0dd66dbb48 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -100,8 +100,6 @@ out_free_source: return -1; } -static int mov__scnprintf(struct ins *ins, char *bf, size_t size, - struct ins_operands *ops); static struct ins_ops s390_mov_ops = { .parse = s390_mov__parse, diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c index 3afe8256eff2..44c857388897 100644 --- a/tools/perf/arch/s390/util/auxtrace.c +++ b/tools/perf/arch/s390/util/auxtrace.c @@ -30,6 +30,7 @@ cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused, struct auxtrace_info_event *auxtrace_info __maybe_unused, size_t priv_size __maybe_unused) { + auxtrace_info->type = PERF_AUXTRACE_S390_CPUMSF; return 0; } diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index d233e2eb9592..aaabab5e2830 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c @@ -102,7 +102,7 @@ const char * const kvm_skip_events[] = { int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid) { - if (strstr(cpuid, "IBM/S390")) { + if (strstr(cpuid, "IBM")) { kvm->exit_reasons = sie_exit_reasons; kvm->exit_reasons_isa = "SIE"; } else diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 7fbca175099e..275dea7ff59a 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,3 +1,5 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 endif + +PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c new file mode 100644 index 000000000000..2614c010c235 --- /dev/null +++ b/tools/perf/arch/sparc/annotate/instructions.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 + +static int is_branch_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if (cond[0] == 'a' && cond[1] == '\0') + return 1; + + if (cond[0] == 'c' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'e' && + (cond[1] == '\0' || + (cond[1] == 'q' && cond[2] == '\0'))) + return 1; + + if (cond[0] == 'g' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'l' && + (cond[1] == '\0' || + (cond[1] == 't' && cond[2] == '\0') || + (cond[1] == 'u' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'u' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'n' && + (cond[1] == '\0' || + (cond[1] == 'e' && cond[2] == '\0') || + (cond[1] == 'z' && cond[2] == '\0') || + (cond[1] == 'e' && cond[2] == 'g' && cond[3] == '\0'))) + return 1; + + if (cond[0] == 'b' && + cond[1] == 'p' && + cond[2] == 'o' && + cond[3] == 's' && + cond[4] == '\0') + return 1; + + if (cond[0] == 'v' && + (cond[1] == 'c' || cond[1] == 's') && + cond[2] == '\0') + return 1; + + if (cond[0] == 'b' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_reg_cond(const char *cond) +{ + if ((cond[0] == 'n' || cond[0] == 'l') && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + if (cond[0] == 'z' && + cond[1] == '\0') + return 1; + + if ((cond[0] == 'g' || cond[0] == 'l') && + cond[1] == 'e' && + cond[2] == 'z' && + cond[3] == '\0') + return 1; + + if (cond[0] == 'g' && + cond[1] == 'z' && + cond[2] == '\0') + return 1; + + return 0; +} + +static int is_branch_float_cond(const char *cond) +{ + if (cond[0] == '\0') + return 1; + + if ((cond[0] == 'a' || cond[0] == 'e' || + cond[0] == 'z' || cond[0] == 'g' || + cond[0] == 'l' || cond[0] == 'n' || + cond[0] == 'o' || cond[0] == 'u') && + cond[1] == '\0') + return 1; + + if (((cond[0] == 'g' && cond[1] == 'e') || + (cond[0] == 'l' && (cond[1] == 'e' || + cond[1] == 'g')) || + (cond[0] == 'n' && (cond[1] == 'e' || + cond[1] == 'z')) || + (cond[0] == 'u' && (cond[1] == 'e' || + cond[1] == 'g' || + cond[1] == 'l'))) && + cond[2] == '\0') + return 1; + + if (cond[0] == 'u' && + (cond[1] == 'g' || cond[1] == 'l') && + cond[2] == 'e' && + cond[3] == '\0') + return 1; + + return 0; +} + +static struct ins_ops *sparc__associate_instruction_ops(struct arch *arch, const char *name) +{ + struct ins_ops *ops = NULL; + + if (!strcmp(name, "call") || + !strcmp(name, "jmp") || + !strcmp(name, "jmpl")) { + ops = &call_ops; + } else if (!strcmp(name, "ret") || + !strcmp(name, "retl") || + !strcmp(name, "return")) { + ops = &ret_ops; + } else if (!strcmp(name, "mov")) { + ops = &mov_ops; + } else { + if (name[0] == 'c' && + (name[1] == 'w' || name[1] == 'x')) + name += 2; + + if (name[0] == 'b') { + const char *cond = name + 1; + + if (cond[0] == 'r') { + if (is_branch_reg_cond(cond + 1)) + ops = &jump_ops; + } else if (is_branch_cond(cond)) { + ops = &jump_ops; + } + } else if (name[0] == 'f' && name[1] == 'b') { + if (is_branch_float_cond(name + 2)) + ops = &jump_ops; + } + } + + if (ops) + arch__associate_ins_ops(arch, name, ops); + + return ops; +} + +static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) +{ + if (!arch->initialized) { + arch->initialized = true; + arch->associate_instruction_ops = sparc__associate_instruction_ops; + arch->objdump.comment_char = '#'; + } + + return 0; +} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 1a38e78117ce..8cc6642fce7a 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -19,9 +19,6 @@ systbl := $(sys)/syscalltbl.sh _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') $(header): $(sys)/syscall_64.tbl $(systbl) - @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ - (diff -B arch/x86/entry/syscalls/syscall_64.tbl ../../arch/x86/entry/syscalls/syscall_64.tbl >/dev/null) \ - || echo "Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl'" >&2 )) || true $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 4dfe42666d0c..f0b1709a5ffb 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -341,6 +341,8 @@ 330 common pkey_alloc __x64_sys_pkey_alloc 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx +333 common io_pgetevents __x64_sys_io_pgetevents +334 common rseq __x64_sys_rseq # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index c1bd979b957b..613709cfbbd0 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -9,6 +9,7 @@ struct test; int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); +int test__bp_modify(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 8e2c5a38c3b9..586849ff83a0 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -5,3 +5,4 @@ libperf-y += arch-tests.o libperf-y += rdpmc.o libperf-y += perf-time-to-tsc.o libperf-$(CONFIG_AUXTRACE) += insn-x86.o +libperf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index cc1802ff5410..d47d3f8e3c8e 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -24,6 +24,12 @@ struct test arch_tests[] = { .func = test__insn_x86, }, #endif +#if defined(__x86_64__) + { + .desc = "x86 bp modify", + .func = test__bp_modify, + }, +#endif { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c new file mode 100644 index 000000000000..f53e4406709f --- /dev/null +++ b/tools/perf/arch/x86/tests/bp-modify.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ptrace.h> +#include <asm/ptrace.h> +#include <errno.h> +#include "debug.h" +#include "tests/tests.h" +#include "arch-tests.h" + +static noinline int bp_1(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static noinline int bp_2(void) +{ + pr_debug("in %s\n", __func__); + return 0; +} + +static int spawn_child(void) +{ + int child = fork(); + + if (child == 0) { + /* + * The child sets itself for as tracee and + * waits in signal for parent to trace it, + * then it calls bp_1 and quits. + */ + int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL); + + if (err) { + pr_debug("failed to PTRACE_TRACEME\n"); + exit(1); + } + + raise(SIGCONT); + bp_1(); + exit(0); + } + + return child; +} + +/* + * This tests creates HW breakpoint, tries to + * change it and checks it was properly changed. + */ +static int bp_modify1(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_2 function + * - changes that breakponit to bp_1 function + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_2)) { + pr_debug("failed to set breakpoint, 1st time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint, 2nd time: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +/* + * This tests creates HW breakpoint, tries to + * change it to bogus value and checks the original + * breakpoint is hit. + */ +static int bp_modify2(void) +{ + pid_t child; + int status; + unsigned long rip = 0, dr7 = 1; + + child = spawn_child(); + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 1\n"); + return TEST_FAIL; + } + + /* + * The parent does following steps: + * - creates a new breakpoint (id 0) for bp_1 function + * - tries to change that breakpoint to (-1) address + * - waits for the breakpoint to hit and checks + * it has proper rip of bp_1 function + * - detaches the child + */ + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), bp_1)) { + pr_debug("failed to set breakpoint: %s\n", + strerror(errno)); + goto out; + } + + if (ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[7]), dr7)) { + pr_debug("failed to set dr7: %s\n", strerror(errno)); + goto out; + } + + if (!ptrace(PTRACE_POKEUSER, child, + offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) { + pr_debug("failed, breakpoint set to bogus address\n"); + goto out; + } + + if (ptrace(PTRACE_CONT, child, NULL, NULL)) { + pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno)); + goto out; + } + + waitpid(child, &status, 0); + if (WIFEXITED(status)) { + pr_debug("tracee exited prematurely 2\n"); + return TEST_FAIL; + } + + rip = ptrace(PTRACE_PEEKUSER, child, + offsetof(struct user_regs_struct, rip), NULL); + if (rip == (unsigned long) -1) { + pr_debug("failed to PTRACE_PEEKUSER: %s\n", + strerror(errno)); + goto out; + } + + pr_debug("rip %lx, bp_1 %p\n", rip, bp_1); + +out: + if (ptrace(PTRACE_DETACH, child, NULL, NULL)) { + pr_debug("failed to PTRACE_DETACH: %s", strerror(errno)); + return TEST_FAIL; + } + + return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL; +} + +int test__bp_modify(struct test *test __maybe_unused, + int subtest __maybe_unused) +{ + TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1()); + TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2()); + + return 0; +} diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 95036c7a59e8..7879df34569a 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c @@ -26,7 +26,7 @@ static int sample_ustack(struct perf_sample *sample, sp = (unsigned long) regs[PERF_REG_X86_SP]; - map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + map = map_groups__find(thread->mg, (u64)sp); if (!map) { pr_debug("failed to get stack map\n"); free(buf); diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index f95e6f46ef0d..844b8f335532 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -4,6 +4,8 @@ libperf-y += pmu.o libperf-y += kvm-stat.o libperf-y += perf_regs.o libperf-y += group.o +libperf-y += machine.o +libperf-y += event.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c new file mode 100644 index 000000000000..675a0213044d --- /dev/null +++ b/tools/perf/arch/x86/util/event.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> + +#include "../../util/machine.h" +#include "../../util/tool.h" +#include "../../util/map.h" +#include "../../util/util.h" +#include "../../util/debug.h" + +#if defined(__x86_64__) + +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int rc = 0; + struct map *pos; + struct map_groups *kmaps = &machine->kmaps; + struct maps *maps = &kmaps->maps; + union perf_event *event = zalloc(sizeof(event->mmap) + + machine->id_hdr_size); + + if (!event) { + pr_debug("Not enough memory synthesizing mmap event " + "for extra kernel maps\n"); + return -1; + } + + for (pos = maps__first(maps); pos; pos = map__next(pos)) { + struct kmap *kmap; + size_t size; + + if (!__map__is_extra_kernel_map(pos)) + continue; + + kmap = map__kmap(pos); + + size = sizeof(event->mmap) - sizeof(event->mmap.filename) + + PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) + + machine->id_hdr_size; + + memset(event, 0, size); + + event->mmap.header.type = PERF_RECORD_MMAP; + + /* + * kernel uses 0 for user space maps, see kernel/perf_event.c + * __perf_event_mmap + */ + if (machine__is_host(machine)) + event->header.misc = PERF_RECORD_MISC_KERNEL; + else + event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; + + event->mmap.header.size = size; + + event->mmap.start = pos->start; + event->mmap.len = pos->end - pos->start; + event->mmap.pgoff = pos->pgoff; + event->mmap.pid = machine->pid; + + strlcpy(event->mmap.filename, kmap->name, PATH_MAX); + + if (perf_tool__process_synth_event(tool, event, machine, + process) != 0) { + rc = -1; + break; + } + } + + free(event); + return rc; +} + +#endif diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c new file mode 100644 index 000000000000..4520ac53caa9 --- /dev/null +++ b/tools/perf/arch/x86/util/machine.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/types.h> +#include <linux/string.h> +#include <stdlib.h> + +#include "../../util/machine.h" +#include "../../util/map.h" +#include "../../util/symbol.h" +#include "../../util/sane_ctype.h" + +#include <symbol/kallsyms.h> + +#if defined(__x86_64__) + +struct extra_kernel_map_info { + int cnt; + int max_cnt; + struct extra_kernel_map *maps; + bool get_entry_trampolines; + u64 entry_trampoline; +}; + +static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start, + u64 end, u64 pgoff, const char *name) +{ + if (mi->cnt >= mi->max_cnt) { + void *buf; + size_t sz; + + mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32; + sz = sizeof(struct extra_kernel_map) * mi->max_cnt; + buf = realloc(mi->maps, sz); + if (!buf) + return -1; + mi->maps = buf; + } + + mi->maps[mi->cnt].start = start; + mi->maps[mi->cnt].end = end; + mi->maps[mi->cnt].pgoff = pgoff; + strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN); + + mi->cnt += 1; + + return 0; +} + +static int find_extra_kernel_maps(void *arg, const char *name, char type, + u64 start) +{ + struct extra_kernel_map_info *mi = arg; + + if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL && + !strcmp(name, "_entry_trampoline")) { + mi->entry_trampoline = start; + return 0; + } + + if (is_entry_trampoline(name)) { + u64 end = start + page_size; + + return add_extra_kernel_map(mi, start, end, 0, name); + } + + return 0; +} + +int machine__create_extra_kernel_maps(struct machine *machine, + struct dso *kernel) +{ + struct extra_kernel_map_info mi = { .cnt = 0, }; + char filename[PATH_MAX]; + int ret; + int i; + + machine__get_kallsyms_filename(machine, filename, PATH_MAX); + + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + + ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps); + if (ret) + goto out_free; + + if (!mi.entry_trampoline) + goto out_free; + + for (i = 0; i < mi.cnt; i++) { + struct extra_kernel_map *xm = &mi.maps[i]; + + xm->pgoff = mi.entry_trampoline; + ret = machine__create_extra_kernel_map(machine, kernel, xm); + if (ret) + goto out_free; + } + + machine->trampolines_mapped = mi.cnt; +out_free: + free(mi.maps); + return ret; +} + +#endif diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index 4b2caf6d48e7..fead6b3b4206 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -226,7 +226,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op) else if (rm[2].rm_so != rm[2].rm_eo) prefix[0] = '+'; else - strncpy(prefix, "+0", 2); + scnprintf(prefix, sizeof(prefix), "+0"); } /* Rename register */ diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index 63a74c32ddc5..e33ef5bc31c5 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> +#include <linux/stddef.h> #include <linux/perf_event.h> #include "../../util/intel-pt.h" diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 06bae7023a51..950539f9a4f7 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include <stdbool.h> #include <errno.h> +#include <linux/stddef.h> #include <linux/perf_event.h> #include "../../perf.h" diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 60bf11943047..eafce1a130a1 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o perf-y += futex-requeue.o perf-y += futex-lock-pi.o +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index b43f8d2a34ec..9ad015a1e202 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -6,6 +6,7 @@ #define altinstr_replacement text #define globl p2align 4; .globl #define _ASM_EXTABLE_FAULT(x, y) +#define _ASM_EXTABLE(x, y) #include "../../arch/x86/lib/memcpy_64.S" /* diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c new file mode 100644 index 000000000000..4130734dde84 --- /dev/null +++ b/tools/perf/bench/mem-memcpy-x86-64-lib.c @@ -0,0 +1,24 @@ +/* + * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy + * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy' + * happy. + */ +#include <linux/types.h> + +unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt); +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len); + +unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 63eb49082774..44195514b19e 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata) u8 *global_data; u8 *process_data; u8 *thread_data; - u64 bytes_done; + u64 bytes_done, secs; long work_done; u32 l; struct rusage rusage; @@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; - td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; + secs = td->runtime_ns / NSEC_PER_SEC; + td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; getrusage(RUSAGE_THREAD, &rusage); td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 51709a961496..93d679eaf1f4 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -40,11 +40,11 @@ struct perf_annotate { struct perf_tool tool; struct perf_session *session; + struct annotation_options opts; bool use_tui, use_stdio, use_stdio2, use_gtk; - bool full_paths; - bool print_line; bool skip_missing; bool has_br_stack; + bool group_set; const char *sym_hist_filter; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -161,12 +161,12 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, hist__account_cycles(sample->branch_stack, al, sample, false); bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); out: return err; @@ -228,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, */ if (al->sym != NULL) { rb_erase(&al->sym->rb_node, - &al->map->dso->symbols[al->map->type]); + &al->map->dso->symbols); symbol__delete(al->sym); dso__reset_find_symbol_cache(al->map->dso); } @@ -248,7 +248,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, if (he == NULL) return -ENOMEM; - ret = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); hists__inc_nr_samples(hists, true); return ret; } @@ -283,15 +283,22 @@ out_put: return ret; } +static int process_feature_event(struct perf_session *session, + union perf_event *event) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(session, event); + return 0; +} + static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct perf_annotate *ann) { if (!ann->use_stdio2) - return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, - ann->print_line, ann->full_paths, 0, 0); - return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, - ann->print_line, ann->full_paths); + return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts); + + return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts); } static void hists__find_annotations(struct hists *hists, @@ -342,7 +349,7 @@ find_next: /* skip missing symbols */ nd = rb_next(nd); } else if (use_browser == 1) { - key = hist_entry__tui_annotate(he, evsel, NULL); + key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts); switch (key) { case -1: @@ -389,8 +396,9 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - if (!objdump_path) { - ret = perf_env__lookup_objdump(&session->header.env); + if (!ann->opts.objdump_path) { + ret = perf_env__lookup_objdump(&session->header.env, + &ann->opts.objdump_path); if (ret) goto out; } @@ -471,10 +479,11 @@ int cmd_annotate(int argc, const char **argv) .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, }, + .opts = annotation__default_options, }; struct perf_data data = { .mode = PERF_DATA_MODE_READ, @@ -502,23 +511,26 @@ int cmd_annotate(int argc, const char **argv) "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), - OPT_BOOLEAN('l', "print-line", &annotate.print_line, + OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines, "print matching source lines (may be slow)"), - OPT_BOOLEAN('P', "full-paths", &annotate.full_paths, + OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path, "Don't shorten the displayed pathnames"), OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing, "Skip symbols that cannot be annotated"), + OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, + &annotate.group_set, + "Show event group information together"), OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"), OPT_CALLBACK(0, "symfs", NULL, "directory", "Look for files with symbols relative to this directory", symbol__config_symfs), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), @@ -529,6 +541,10 @@ int cmd_annotate(int argc, const char **argv) OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode", "'always' (default), 'never' or 'auto' only applicable to --stdio mode", stdio__config_color, "always"), + OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period", + "Set percent type local/global-period/hits", + annotate_parse_percent_type), + OPT_END() }; int ret; @@ -570,6 +586,9 @@ int cmd_annotate(int argc, const char **argv) annotate.has_br_stack = perf_header__has_feat(&annotate.session->header, HEADER_BRANCH_STACK); + if (annotate.group_set) + perf_evlist__force_leader(annotate.session->evlist); + ret = symbol__annotation_init(); if (ret < 0) goto out_delete; diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 41db2cba77eb..115110a4796a 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -25,6 +25,7 @@ #include "util/session.h" #include "util/symbol.h" #include "util/time-utils.h" +#include "util/probe-file.h" static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) { @@ -239,6 +240,34 @@ out: return err; } +static int build_id_cache__purge_all(void) +{ + struct strlist *list; + struct str_node *pos; + int err = 0; + char *buf; + + list = build_id_cache__list_all(false); + if (!list) { + pr_debug("Failed to get buildids: -%d\n", errno); + return -EINVAL; + } + + strlist__for_each_entry(pos, list) { + buf = build_id_cache__origname(pos->s); + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s (%s): %s\n", buf, pos->s, + err ? "FAIL" : "Ok"); + free(buf); + if (err) + break; + } + strlist__delete(list); + + pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok"); + return err; +} + static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused) { char filename[PATH_MAX]; @@ -297,6 +326,26 @@ static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi) return err; } +static int build_id_cache__show_all(void) +{ + struct strlist *bidlist; + struct str_node *nd; + char *buf; + + bidlist = build_id_cache__list_all(true); + if (!bidlist) { + pr_debug("Failed to get buildids: -%d\n", errno); + return -1; + } + strlist__for_each_entry(nd, bidlist) { + buf = build_id_cache__origname(nd->s); + fprintf(stdout, "%s %s\n", nd->s, buf); + free(buf); + } + strlist__delete(bidlist); + return 0; +} + int cmd_buildid_cache(int argc, const char **argv) { struct strlist *list; @@ -304,6 +353,9 @@ int cmd_buildid_cache(int argc, const char **argv) int ret = 0; int ns_id = -1; bool force = false; + bool list_files = false; + bool opts_flag = false; + bool purge_all = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, *purge_name_list_str = NULL, @@ -327,6 +379,8 @@ int cmd_buildid_cache(int argc, const char **argv) "file(s) to remove"), OPT_STRING('p', "purge", &purge_name_list_str, "file list", "file(s) to remove (remove old caches too)"), + OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"), + OPT_BOOLEAN('l', "list", &list_files, "list all cached files"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -344,11 +398,20 @@ int cmd_buildid_cache(int argc, const char **argv) argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); - if (argc || (!add_name_list_str && !kcore_filename && - !remove_name_list_str && !purge_name_list_str && - !missing_filename && !update_name_list_str)) + opts_flag = add_name_list_str || kcore_filename || + remove_name_list_str || purge_name_list_str || + missing_filename || update_name_list_str || + purge_all; + + if (argc || !(list_files || opts_flag)) usage_with_options(buildid_cache_usage, buildid_cache_options); + /* -l is exclusive. It can not be used with other options. */ + if (list_files && opts_flag) { + usage_with_options_msg(buildid_cache_usage, + buildid_cache_options, "-l is exclusive.\n"); + } + if (ns_id > 0) nsi = nsinfo__new(ns_id); @@ -366,6 +429,11 @@ int cmd_buildid_cache(int argc, const char **argv) setup_pager(); + if (list_files) { + ret = build_id_cache__show_all(); + goto out; + } + if (add_name_list_str) { list = strlist__new(add_name_list_str, NULL); if (list) { @@ -420,6 +488,13 @@ int cmd_buildid_cache(int argc, const char **argv) } } + if (purge_all) { + if (build_id_cache__purge_all()) { + pr_warning("Couldn't remove some caches. Error: %s.\n", + str_error_r(errno, sbuf, sizeof(sbuf))); + } + } + if (missing_filename) ret = build_id_cache__fprintf_missing(session, stdout); diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2126bfbcb385..f3aa9d02a5ab 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -56,16 +56,16 @@ struct c2c_hist_entry { struct compute_stats cstats; + unsigned long paddr; + unsigned long paddr_cnt; + bool paddr_zero; + char *nodestr; + /* * must be at the end, * because of its callchain dynamic entry */ struct hist_entry he; - - unsigned long paddr; - unsigned long paddr_cnt; - bool paddr_zero; - char *nodestr; }; static char const *coalesce_default = "pid,iaddr"; @@ -1976,7 +1976,7 @@ static int filter_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); if (c2c.show_src && !he->srcline) - he->srcline = hist_entry__get_srcline(he); + he->srcline = hist_entry__srcline(he); calc_width(c2c_he); @@ -2193,7 +2193,7 @@ static void print_cacheline(struct c2c_hists *c2c_hists, fprintf(out, "%s\n", bf); fprintf(out, " -------------------------------------------------------------\n"); - hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true); + hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false); } static void print_pareto(FILE *out) @@ -2268,7 +2268,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) fprintf(out, "=================================================\n"); fprintf(out, "#\n"); - hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false); + hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, true); fprintf(out, "\n"); fprintf(out, "=================================================\n"); @@ -2349,6 +2349,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) " s Toggle full length of symbol and source line columns \n" " q Return back to cacheline list \n"; + if (!he) + return 0; + /* Display compact version first. */ c2c.symbol_full = false; diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d660cb7b222b..39db2ee32d48 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -696,7 +696,7 @@ static void hists__process(struct hists *hists) hists__output_resort(hists, NULL); hists__fprintf(hists, !quiet, 0, 0, 0, stdout, - symbol_conf.use_callchain); + !symbol_conf.use_callchain); } static void data__fprintf(void) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 40fe919bbcf3..eda41673c4f3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -86,12 +86,10 @@ static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused, } #endif -static int perf_event__repipe_op2_synth(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session - __maybe_unused) +static int perf_event__repipe_op2_synth(struct perf_session *session, + union perf_event *event) { - return perf_event__repipe_synth(tool, event); + return perf_event__repipe_synth(session->tool, event); } static int perf_event__repipe_attr(struct perf_tool *tool, @@ -133,10 +131,10 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size) return 0; } -static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +static s64 perf_event__repipe_auxtrace(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_inject *inject = container_of(tool, struct perf_inject, tool); int ret; @@ -174,9 +172,8 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, #else static s64 -perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { pr_err("AUX area tracing not supported\n"); return -EINVAL; @@ -362,26 +359,24 @@ static int perf_event__repipe_exit(struct perf_tool *tool, return err; } -static int perf_event__repipe_tracing_data(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +static int perf_event__repipe_tracing_data(struct perf_session *session, + union perf_event *event) { int err; - perf_event__repipe_synth(tool, event); - err = perf_event__process_tracing_data(tool, event, session); + perf_event__repipe_synth(session->tool, event); + err = perf_event__process_tracing_data(session, event); return err; } -static int perf_event__repipe_id_index(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +static int perf_event__repipe_id_index(struct perf_session *session, + union perf_event *event) { int err; - perf_event__repipe_synth(tool, event); - err = perf_event__process_id_index(tool, event, session); + perf_event__repipe_synth(session->tool, event); + err = perf_event__process_id_index(session, event); return err; } @@ -440,9 +435,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, goto repipe; } - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - - if (al.map != NULL) { + if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) { if (!al.map->dso->hit) { al.map->dso->hit = 1; if (map__load(al.map) >= 0) { @@ -805,7 +798,8 @@ int cmd_inject(int argc, const char **argv) "kallsyms pathname"), OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, - NULL, "opts", "Instruction Tracing options", + NULL, "opts", "Instruction Tracing options\n" + ITRACE_HELP, itrace_parse_synth_opts), OPT_BOOLEAN(0, "strip", &inject.strip, "strip non-synthesized events (use with --itrace)"), diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index bcfb363112d3..90d1a2305b72 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c @@ -27,7 +27,7 @@ static int __cmd_kallsyms(int argc, const char **argv) for (i = 0; i < argc; ++i) { struct map *map; - struct symbol *symbol = machine__find_kernel_function_by_name(machine, argv[i], &map); + struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map); if (symbol == NULL) { printf("%s: not found\n", argv[i]); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index ae11e4c3516a..b63bca4b0c2a 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -729,7 +729,7 @@ static char *compact_gfp_string(unsigned long gfp_flags) static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample, unsigned int gfp_flags) { - struct pevent_record record = { + struct tep_record record = { .cpu = sample->cpu, .data = sample->raw_data, .size = sample->raw_size, @@ -747,7 +747,7 @@ static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample, } trace_seq_init(&seq); - pevent_event_info(&seq, evsel->tp_format, &record); + tep_event_info(&seq, evsel->tp_format, &record); str = strtok_r(seq.buffer, " ", &pos); while (str) { @@ -1004,7 +1004,7 @@ static void __print_slab_result(struct rb_root *root, if (is_caller) { addr = data->call_site; if (!raw_ip) - sym = machine__find_kernel_function(machine, addr, &map); + sym = machine__find_kernel_symbol(machine, addr, &map); } else addr = data->ptr; @@ -1068,7 +1068,7 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines) char *caller = buf; data = rb_entry(next, struct page_stat, node); - sym = machine__find_kernel_function(machine, data->callsite, &map); + sym = machine__find_kernel_symbol(machine, data->callsite, &map); if (sym) caller = sym->name; else @@ -1110,7 +1110,7 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines char *caller = buf; data = rb_entry(next, struct page_stat, node); - sym = machine__find_kernel_function(machine, data->callsite, &map); + sym = machine__find_kernel_symbol(machine, data->callsite, &map); if (sym) caller = sym->name; else @@ -1974,7 +1974,7 @@ int cmd_kmem(int argc, const char **argv) goto out_delete; } - kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); + kmem_page_size = tep_get_page_size(evsel->tp_format->pevent); symbol_conf.use_callchain = true; } diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 72e2ca096bf5..2b1ef704169f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1438,8 +1438,6 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, goto out; } - symbol_conf.nr_events = kvm->evlist->nr_entries; - if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0) usage_with_options(live_usage, live_options); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index c0065923a525..99de91698de1 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -81,8 +81,7 @@ static int parse_probe_event(const char *str) params.target_used = true; } - if (params.nsi) - pev->nsi = nsinfo__get(params.nsi); + pev->nsi = nsinfo__get(params.nsi); /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 22ebeb92ac51..488779bc4c8d 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -106,9 +106,12 @@ static bool switch_output_time(struct record *rec) trigger_is_ready(&switch_output_trigger); } -static int record__write(struct record *rec, void *bf, size_t size) +static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused, + void *bf, size_t size) { - if (perf_data__write(rec->session->data, bf, size) < 0) { + struct perf_data_file *file = &rec->session->data->file; + + if (perf_data_file__write(file, bf, size) < 0) { pr_err("failed to write perf data, error: %m\n"); return -1; } @@ -127,15 +130,15 @@ static int process_synthesized_event(struct perf_tool *tool, struct machine *machine __maybe_unused) { struct record *rec = container_of(tool, struct record, tool); - return record__write(rec, event, event->header.size); + return record__write(rec, NULL, event, event->header.size); } -static int record__pushfn(void *to, void *bf, size_t size) +static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size) { struct record *rec = to; rec->samples++; - return record__write(rec, bf, size); + return record__write(rec, map, bf, size); } static volatile int done; @@ -170,6 +173,7 @@ static void record__sig_exit(void) #ifdef HAVE_AUXTRACE_SUPPORT static int record__process_auxtrace(struct perf_tool *tool, + struct perf_mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2) { @@ -197,21 +201,21 @@ static int record__process_auxtrace(struct perf_tool *tool, if (padding) padding = 8 - padding; - record__write(rec, event, event->header.size); - record__write(rec, data1, len1); + record__write(rec, map, event, event->header.size); + record__write(rec, map, data1, len1); if (len2) - record__write(rec, data2, len2); - record__write(rec, &pad, padding); + record__write(rec, map, data2, len2); + record__write(rec, map, &pad, padding); return 0; } static int record__auxtrace_mmap_read(struct record *rec, - struct auxtrace_mmap *mm) + struct perf_mmap *map) { int ret; - ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, + ret = auxtrace_mmap__read(map, rec->itr, &rec->tool, record__process_auxtrace); if (ret < 0) return ret; @@ -223,11 +227,11 @@ static int record__auxtrace_mmap_read(struct record *rec, } static int record__auxtrace_mmap_read_snapshot(struct record *rec, - struct auxtrace_mmap *mm) + struct perf_mmap *map) { int ret; - ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, + ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool, record__process_auxtrace, rec->opts.auxtrace_snapshot_size); if (ret < 0) @@ -245,13 +249,12 @@ static int record__auxtrace_read_snapshot_all(struct record *rec) int rc = 0; for (i = 0; i < rec->evlist->nr_mmaps; i++) { - struct auxtrace_mmap *mm = - &rec->evlist->mmap[i].auxtrace_mmap; + struct perf_mmap *map = &rec->evlist->mmap[i]; - if (!mm->base) + if (!map->auxtrace_mmap.base) continue; - if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { + if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) { rc = -1; goto out; } @@ -295,7 +298,7 @@ static int record__auxtrace_init(struct record *rec) static inline int record__auxtrace_mmap_read(struct record *rec __maybe_unused, - struct auxtrace_mmap *mm __maybe_unused) + struct perf_mmap *map __maybe_unused) { return 0; } @@ -388,7 +391,12 @@ try_again: ui__warning("%s\n", msg); goto try_again; } - + if ((errno == EINVAL || errno == EBADF) && + pos->leader != pos && + pos->weak_group) { + pos = perf_evlist__reset_weak_group(evlist, pos); + goto try_again; + } rc = -errno; perf_evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg)); @@ -529,17 +537,17 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; + struct perf_mmap *map = &maps[i]; - if (maps[i].base) { - if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) { + if (map->base) { + if (perf_mmap__push(map, rec, record__pushfn) != 0) { rc = -1; goto out; } } - if (mm->base && !rec->opts.auxtrace_snapshot_mode && - record__auxtrace_mmap_read(rec, mm) != 0) { + if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode && + record__auxtrace_mmap_read(rec, map) != 0) { rc = -1; goto out; } @@ -550,7 +558,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli * at least one event. */ if (bytes_written != rec->bytes_written) - rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); + rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event)); if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); @@ -589,6 +597,9 @@ static void record__init_features(struct record *rec) if (!rec->opts.full_auxtrace) perf_header__clear_feat(&session->header, HEADER_AUXTRACE); + if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) + perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -758,7 +769,7 @@ static int record__synthesize(struct record *rec, bool tail) * We need to synthesize events first, because some * features works on top of them (on report side). */ - err = perf_event__synthesize_attrs(tool, session, + err = perf_event__synthesize_attrs(tool, rec->evlist, process_synthesized_event); if (err < 0) { pr_err("Couldn't synthesize attrs.\n"); @@ -894,6 +905,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) record__init_features(rec); + if (rec->opts.use_clockid && rec->opts.clockid_res_ns) + session->header.env.clockid_res_ns = rec->opts.clockid_res_ns; + if (forks) { err = perf_evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe, @@ -1334,6 +1348,19 @@ static const struct clockid_map clockids[] = { CLOCKID_END, }; +static int get_clockid_res(clockid_t clk_id, u64 *res_ns) +{ + struct timespec res; + + *res_ns = 0; + if (!clock_getres(clk_id, &res)) + *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC; + else + pr_warning("WARNING: Failed to determine specified clock resolution.\n"); + + return 0; +} + static int parse_clockid(const struct option *opt, const char *str, int unset) { struct record_opts *opts = (struct record_opts *)opt->value; @@ -1357,7 +1384,7 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) /* if its a number, we're done */ if (sscanf(str, "%d", &opts->clockid) == 1) - return 0; + return get_clockid_res(opts->clockid, &opts->clockid_res_ns); /* allow a "CLOCK_" prefix to the name */ if (!strncasecmp(str, "CLOCK_", 6)) @@ -1366,7 +1393,8 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) for (cm = clockids; cm->name; cm++) { if (!strcasecmp(str, cm->name)) { opts->clockid = cm->clockid; - return 0; + return get_clockid_res(opts->clockid, + &opts->clockid_res_ns); } } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0f198f6d9b77..257c9c18cb7e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -71,6 +71,7 @@ struct report { bool group_set; int max_stack; struct perf_read_values show_threads_values; + struct annotation_options annotation_opts; const char *pretty_printing_style; const char *cpu_list; const char *symbol_filter_str; @@ -136,26 +137,25 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, if (sort__mode == SORT_MODE__BRANCH) { bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); } else if (rep->mem_mode) { mi = he->mem_info; - err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel); if (err) goto out; - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } else if (symbol_conf.cumulate_callchain) { if (single) - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, - al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } else { - err = hist_entry__inc_addr_samples(he, sample, evsel->idx, al->addr); + err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr); } out: @@ -181,11 +181,11 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, rep->nonany_branch_mode); bi = he->branch_info; - err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); if (err) goto out; - err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel); branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); @@ -194,30 +194,20 @@ out: return err; } -/* - * Events in data file are not collect in groups, but we still want - * the group display. Set the artificial group and set the leader's - * forced_leader flag to notify the display code. - */ static void setup_forced_leader(struct report *report, struct perf_evlist *evlist) { - if (report->group_set && !evlist->nr_groups) { - struct perf_evsel *leader = perf_evlist__first(evlist); - - perf_evlist__set_leader(evlist); - leader->forced_leader = true; - } + if (report->group_set) + perf_evlist__force_leader(evlist); } -static int process_feature_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +static int process_feature_event(struct perf_session *session, + union perf_event *event) { - struct report *rep = container_of(tool, struct report, tool); + struct report *rep = container_of(session->tool, struct report, tool); if (event->feat.feat_id < HEADER_LAST_FEATURE) - return perf_event__process_feature(tool, event, session); + return perf_event__process_feature(session, event); if (event->feat.feat_id != HEADER_LAST_FEATURE) { pr_err("failed: wrong feature ID: %" PRIu64 "\n", @@ -226,7 +216,8 @@ static int process_feature_event(struct perf_tool *tool, } /* - * All features are received, we can force the + * (feat_id = HEADER_LAST_FEATURE) is the end marker which + * means all features are received, now we can force the * group if needed. */ setup_forced_leader(rep, session->evlist); @@ -486,8 +477,8 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, hists__fprintf_nr_sample_events(hists, rep, evname, stdout); hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout, - symbol_conf.use_callchain || - symbol_conf.show_branchflag_count); + !(symbol_conf.use_callchain || + symbol_conf.show_branchflag_count)); fprintf(stdout, "\n\n"); } @@ -523,12 +514,9 @@ static void report__warn_kptr_restrict(const struct report *rep) "As no suitable kallsyms nor vmlinux was found, kernel samples\n" "can't be resolved."; - if (kernel_map) { - const struct dso *kdso = kernel_map->dso; - if (!RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION])) { - desc = "If some relocation was applied (e.g. " - "kexec) symbols may be misresolved."; - } + if (kernel_map && map__has_symbols(kernel_map)) { + desc = "If some relocation was applied (e.g. " + "kexec) symbols may be misresolved."; } ui__warning( @@ -573,7 +561,7 @@ static int report__browse_hists(struct report *rep) ret = perf_evlist__tui_browse_hists(evlist, help, NULL, rep->min_percent, &session->header.env, - true); + true, &rep->annotation_opts); /* * Usually "ret" is the last pressed key, and we only * care if the key notifies us to switch data file. @@ -718,10 +706,7 @@ static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) { - int printed = 0, i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += maps__fprintf_task(&mg->maps[i], indent, fp); - return printed; + return maps__fprintf_task(&mg->maps, indent, fp); } static void task__print_level(struct task *task, FILE *fp, int level) @@ -961,12 +946,6 @@ parse_percent_limit(const struct option *opt, const char *str, return 0; } -#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" - -const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" - CALLCHAIN_REPORT_HELP - "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; - int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -975,6 +954,10 @@ int cmd_report(int argc, const char **argv) bool has_br_stack = false; int branch_mode = -1; bool branch_call_mode = false; +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" + const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { "perf report [<options>]", @@ -997,6 +980,7 @@ int cmd_report(int argc, const char **argv) .id_index = perf_event__process_id_index, .auxtrace_info = perf_event__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, + .event_update = perf_event__process_event_update, .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, @@ -1004,6 +988,7 @@ int cmd_report(int argc, const char **argv) .max_stack = PERF_MAX_STACK_DEPTH, .pretty_printing_style = "normal", .socket_filter = -1, + .annotation_opts = annotation__default_options, }; const struct option options[] = { OPT_STRING('i', "input", &input_name, "file", @@ -1093,11 +1078,11 @@ int cmd_report(int argc, const char **argv) "list of cpus to profile"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, "Show a column with the sum of periods"), @@ -1108,7 +1093,7 @@ int cmd_report(int argc, const char **argv) parse_branch_mode), OPT_BOOLEAN(0, "branch-history", &branch_call_mode, "add last branch records to call history"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &report.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, "Disable symbol demangling"), @@ -1120,7 +1105,7 @@ int cmd_report(int argc, const char **argv) OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", "how to display percentage of filtered entries", parse_filter_percentage), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", - "Instruction Tracing options", + "Instruction Tracing options\n" ITRACE_HELP, itrace_parse_synth_opts), OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, "Show full source file name path for source lines"), @@ -1139,6 +1124,9 @@ int cmd_report(int argc, const char **argv) "Time span of interest (start,stop)"), OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, "Show inline function"), + OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", + "Set percent type local/global-period/hits", + annotate_parse_percent_type), OPT_END() }; struct perf_data data = { @@ -1381,9 +1369,9 @@ repeat: } if (session->tevent.pevent && - pevent_set_function_resolver(session->tevent.pevent, - machine__resolve_kernel_addr, - &session->machines.host) < 0) { + tep_set_function_resolver(session->tevent.pevent, + machine__resolve_kernel_addr, + &session->machines.host) < 0) { pr_err("%s: failed to set libtraceevent function resolver\n", __func__); return -1; diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 4dfdee668b0c..cbf39dab19c1 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -2143,7 +2143,7 @@ static void save_task_callchain(struct perf_sched *sched, return; } - if (!symbol_conf.use_callchain || sample->callchain == NULL) + if (!sched->show_callchain || sample->callchain == NULL) return; if (thread__resolve_callchain(thread, cursor, evsel, sample, @@ -2271,10 +2271,11 @@ static struct thread *get_idle_thread(int cpu) return idle_threads[cpu]; } -static void save_idle_callchain(struct idle_thread_runtime *itr, +static void save_idle_callchain(struct perf_sched *sched, + struct idle_thread_runtime *itr, struct perf_sample *sample) { - if (!symbol_conf.use_callchain || sample->callchain == NULL) + if (!sched->show_callchain || sample->callchain == NULL) return; callchain_cursor__copy(&itr->cursor, &callchain_cursor); @@ -2320,7 +2321,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched, /* copy task callchain when entering to idle */ if (perf_evsel__intval(evsel, sample, "next_pid") == 0) - save_idle_callchain(itr, sample); + save_idle_callchain(sched, itr, sample); } } @@ -2849,7 +2850,7 @@ static void timehist_print_summary(struct perf_sched *sched, printf(" CPU %2d idle entire time window\n", i); } - if (sched->idle_hist && symbol_conf.use_callchain) { + if (sched->idle_hist && sched->show_callchain) { callchain_param.mode = CHAIN_FOLDED; callchain_param.value = CCVAL_PERIOD; @@ -2933,8 +2934,7 @@ static int timehist_check_attr(struct perf_sched *sched, return -1; } - if (sched->show_callchain && - !(evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) { + if (sched->show_callchain && !evsel__has_callchain(evsel)) { pr_info("Samples do not have callchains.\n"); sched->show_callchain = 0; symbol_conf.use_callchain = 0; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index e0a9845b6cbc..b5bc85bd0bbe 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -44,6 +44,7 @@ #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> +#include <subcmd/pager.h> #include "sane_ctype.h" @@ -153,8 +154,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -165,8 +166,9 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD | PERF_OUTPUT_BPF_OUTPUT, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | + PERF_OUTPUT_BPF_OUTPUT, .invalid_fields = PERF_OUTPUT_TRACE, }, @@ -179,16 +181,28 @@ static struct { PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, + [PERF_TYPE_HW_CACHE] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, + [PERF_TYPE_RAW] = { .user_set = false, .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | - PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | - PERF_OUTPUT_PHYS_ADDR, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | + PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC | + PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -199,8 +213,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_PERIOD, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -211,8 +225,8 @@ static struct { .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | - PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | - PERF_OUTPUT_SYNTH, + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -393,9 +407,10 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_WEIGHT)) return -EINVAL; - if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { + if (PRINT_FIELD(SYM) && + !(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { pr_err("Display of symbols requested but neither sample IP nor " - "sample address\nis selected. Hence, no addresses to convert " + "sample address\navailable. Hence, no addresses to convert " "to symbols.\n"); return -EINVAL; } @@ -404,10 +419,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected.\n"); return -EINVAL; } - if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) && - !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) { - pr_err("Display of DSO requested but no address to convert. Select\n" - "sample IP, sample address, brstack, brstacksym, or brstackoff.\n"); + if (PRINT_FIELD(DSO) && + !(evsel->attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) { + pr_err("Display of DSO requested but no address to convert.\n"); return -EINVAL; } if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) { @@ -516,7 +530,7 @@ static int perf_session__check_output_opt(struct perf_session *session) evlist__for_each_entry(session->evlist, evsel) { not_pipe = true; - if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { use_callchain = true; break; } @@ -531,21 +545,18 @@ static int perf_session__check_output_opt(struct perf_session *session) */ if (symbol_conf.use_callchain && !output[PERF_TYPE_TRACEPOINT].user_set) { - struct perf_event_attr *attr; - j = PERF_TYPE_TRACEPOINT; evlist__for_each_entry(session->evlist, evsel) { if (evsel->attr.type != j) continue; - attr = &evsel->attr; - - if (attr->sample_type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { output[j].fields |= PERF_OUTPUT_IP; output[j].fields |= PERF_OUTPUT_SYM; + output[j].fields |= PERF_OUTPUT_SYMOFFSET; output[j].fields |= PERF_OUTPUT_DSO; - set_print_ip_opts(attr); + set_print_ip_opts(&evsel->attr); goto out; } } @@ -608,7 +619,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, if (PRINT_FIELD(COMM)) { if (latency_format) printed += fprintf(fp, "%8.8s ", thread__comm_str(thread)); - else if (PRINT_FIELD(IP) && symbol_conf.use_callchain) + else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain) printed += fprintf(fp, "%s ", thread__comm_str(thread)); else printed += fprintf(fp, "%16s ", thread__comm_str(thread)); @@ -717,8 +728,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample, if (PRINT_FIELD(DSO)) { memset(&alf, 0, sizeof(alf)); memset(&alt, 0, sizeof(alt)); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); + thread__find_map(thread, sample->cpumode, from, &alf); + thread__find_map(thread, sample->cpumode, to, &alt); } printed += fprintf(fp, " 0x%"PRIx64, from); @@ -764,13 +775,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - if (alf.map) - alf.sym = map__find_symbol(alf.map, alf.addr); - - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); - if (alt.map) - alt.sym = map__find_symbol(alt.map, alt.addr); + thread__find_symbol(thread, sample->cpumode, from, &alf); + thread__find_symbol(thread, sample->cpumode, to, &alt); printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp); if (PRINT_FIELD(DSO)) { @@ -814,12 +820,12 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample, from = br->entries[i].from; to = br->entries[i].to; - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf); - if (alf.map && !alf.map->dso->adjust_symbols) + if (thread__find_map(thread, sample->cpumode, from, &alf) && + !alf.map->dso->adjust_symbols) from = map__map_ip(alf.map, from); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt); - if (alt.map && !alt.map->dso->adjust_symbols) + if (thread__find_map(thread, sample->cpumode, to, &alt) && + !alt.map->dso->adjust_symbols) to = map__map_ip(alt.map, to); printed += fprintf(fp, " 0x%"PRIx64, from); @@ -882,8 +888,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, return 0; } - thread__find_addr_map(thread, *cpumode, MAP__FUNCTION, start, &al); - if (!al.map || !al.map->dso) { + if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) { pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end); return 0; } @@ -908,7 +913,7 @@ static int grab_bb(u8 *buffer, u64 start, u64 end, static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, struct perf_insn *x, u8 *inbuf, int len, - int insn, FILE *fp) + int insn, FILE *fp, int *total_cycles) { int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip, dump_insn(x, ip, inbuf, len, NULL), @@ -917,7 +922,8 @@ static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en, en->flags.in_tx ? " INTX" : "", en->flags.abort ? " ABORT" : ""); if (en->flags.cycles) { - printed += fprintf(fp, " %d cycles", en->flags.cycles); + *total_cycles += en->flags.cycles; + printed += fprintf(fp, " %d cycles [%d]", en->flags.cycles, *total_cycles); if (insn) printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles); } @@ -933,10 +939,8 @@ static int ip__fprintf_sym(uint64_t addr, struct thread *thread, memset(&al, 0, sizeof(al)); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); - if (!al.map) - thread__find_addr_map(thread, cpumode, MAP__VARIABLE, - addr, &al); + thread__find_map(thread, cpumode, addr, &al); + if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end) return 0; @@ -976,6 +980,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, u8 buffer[MAXBB]; unsigned off; struct symbol *lastsym = NULL; + int total_cycles = 0; if (!(br && br->nr)) return 0; @@ -996,7 +1001,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(br->entries[nr - 1].from, thread, x.cpumode, x.cpu, &lastsym, attr, fp); printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1], - &x, buffer, len, 0, fp); + &x, buffer, len, 0, fp, &total_cycles); } /* Print all blocks */ @@ -1024,7 +1029,8 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample, printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp); if (ip == end) { - printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp); + printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp, + &total_cycles); break; } else { printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip, @@ -1102,6 +1108,35 @@ out: return printed; } +static const char *resolve_branch_sym(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al, + u64 *ip) +{ + struct addr_location addr_al; + struct perf_event_attr *attr = &evsel->attr; + const char *name = NULL; + + if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { + if (sample_addr_correlates_sym(attr)) { + thread__resolve(thread, &addr_al, sample); + if (addr_al.sym) + name = addr_al.sym->name; + else + *ip = sample->addr; + } else { + *ip = sample->addr; + } + } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { + if (al->sym) + name = al->sym->name; + else + *ip = sample->ip; + } + return name; +} + static int perf_sample__fprintf_callindent(struct perf_sample *sample, struct perf_evsel *evsel, struct thread *thread, @@ -1109,10 +1144,10 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, { struct perf_event_attr *attr = &evsel->attr; size_t depth = thread_stack__depth(thread); - struct addr_location addr_al; const char *name = NULL; static int spacing; int len = 0; + int dlen = 0; u64 ip = 0; /* @@ -1122,21 +1157,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN) depth += 1; - if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) { - if (sample_addr_correlates_sym(attr)) { - thread__resolve(thread, &addr_al, sample); - if (addr_al.sym) - name = addr_al.sym->name; - else - ip = sample->addr; - } else { - ip = sample->addr; - } - } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) { - if (al->sym) - name = al->sym->name; - else - ip = sample->ip; + name = resolve_branch_sym(sample, evsel, thread, al, &ip); + + if (PRINT_FIELD(DSO) && !(PRINT_FIELD(IP) || PRINT_FIELD(ADDR))) { + dlen += fprintf(fp, "("); + dlen += map__fprintf_dsoname(al->map, fp); + dlen += fprintf(fp, ")\t"); } if (name) @@ -1157,7 +1183,7 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, if (len < spacing) len += fprintf(fp, "%*s", spacing - len, ""); - return len; + return len + dlen; } static int perf_sample__fprintf_insn(struct perf_sample *sample, @@ -1253,6 +1279,18 @@ static struct { {0, NULL} }; +static const char *sample_flags_to_name(u32 flags) +{ + int i; + + for (i = 0; sample_flags[i].name ; i++) { + if (sample_flags[i].flags == flags) + return sample_flags[i].name; + } + + return NULL; +} + static int perf_sample__fprintf_flags(u32 flags, FILE *fp) { const char *chars = PERF_IP_FLAG_CHARS; @@ -1262,11 +1300,20 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp) char str[33]; int i, pos = 0; - for (i = 0; sample_flags[i].name ; i++) { - if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) { - name = sample_flags[i].name; - break; - } + name = sample_flags_to_name(flags & ~PERF_IP_FLAG_IN_TX); + if (name) + return fprintf(fp, " %-15s%4s ", name, in_tx ? "(x)" : ""); + + if (flags & PERF_IP_FLAG_TRACE_BEGIN) { + name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_BEGIN)); + if (name) + return fprintf(fp, " tr strt %-7s%4s ", name, in_tx ? "(x)" : ""); + } + + if (flags & PERF_IP_FLAG_TRACE_END) { + name = sample_flags_to_name(flags & ~(PERF_IP_FLAG_IN_TX | PERF_IP_FLAG_TRACE_END)); + if (name) + return fprintf(fp, " tr end %-7s%4s ", name, in_tx ? "(x)" : ""); } for (i = 0; i < n; i++, flags >>= 1) { @@ -1279,10 +1326,7 @@ static int perf_sample__fprintf_flags(u32 flags, FILE *fp) } str[pos] = 0; - if (name) - return fprintf(fp, " %-7s%4s ", name, in_tx ? "(x)" : ""); - - return fprintf(fp, " %-11s ", str); + return fprintf(fp, " %-19s ", str); } struct printer_data { @@ -1542,7 +1586,8 @@ struct metric_ctx { FILE *fp; }; -static void script_print_metric(void *ctx, const char *color, +static void script_print_metric(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color, const char *fmt, const char *unit, double val) { @@ -1560,7 +1605,8 @@ static void script_print_metric(void *ctx, const char *color, fprintf(mctx->fp, " %s\n", unit); } -static void script_new_line(void *ctx) +static void script_new_line(struct perf_stat_config *config __maybe_unused, + void *ctx) { struct metric_ctx *mctx = ctx; @@ -1606,7 +1652,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { - perf_stat__print_shadow_stats(ev2, + perf_stat__print_shadow_stats(&stat_config, ev2, evsel_script(ev2)->val, sample->cpu, &ctx, @@ -1617,6 +1663,47 @@ static void perf_sample__fprint_metric(struct perf_script *script, } } +static bool show_event(struct perf_sample *sample, + struct perf_evsel *evsel, + struct thread *thread, + struct addr_location *al) +{ + int depth = thread_stack__depth(thread); + + if (!symbol_conf.graph_function) + return true; + + if (thread->filter) { + if (depth <= thread->filter_entry_depth) { + thread->filter = false; + return false; + } + return true; + } else { + const char *s = symbol_conf.graph_function; + u64 ip; + const char *name = resolve_branch_sym(sample, evsel, thread, al, + &ip); + unsigned nlen; + + if (!name) + return false; + nlen = strlen(name); + while (*s) { + unsigned len = strcspn(s, ","); + if (nlen == len && !strncmp(name, s, len)) { + thread->filter = true; + thread->filter_entry_depth = depth; + return true; + } + s += len; + if (*s == ',') + s++; + } + return false; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1631,6 +1718,9 @@ static void process_event(struct perf_script *script, if (output[type].fields == 0) return; + if (!show_event(sample, evsel, thread, al)) + return; + ++es->samples; perf_sample__fprintf_start(sample, thread, evsel, @@ -1708,6 +1798,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(METRIC)) perf_sample__fprint_metric(script, thread, evsel, sample, fp); + + if (verbose) + fflush(fp); } static struct scripting_ops *scripting_ops; @@ -1832,6 +1925,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist *evlist; struct perf_evsel *evsel, *pos; int err; + static struct perf_evsel_script *es; err = perf_event__process_attr(tool, event, pevlist); if (err) @@ -1840,6 +1934,19 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, evlist = *pevlist; evsel = perf_evlist__last(*pevlist); + if (!evsel->priv) { + if (scr->per_event_dump) { + evsel->priv = perf_evsel_script__new(evsel, + scr->session->data); + } else { + es = zalloc(sizeof(*es)); + if (!es) + return -ENOMEM; + es->fp = stdout; + evsel->priv = es; + } + } + if (evsel->attr.type >= PERF_TYPE_MAX && evsel->attr.type != PERF_TYPE_SYNTH) return 0; @@ -2473,6 +2580,8 @@ parse: output[j].fields &= ~all_output_options[i].field; else output[j].fields |= all_output_options[i].field; + output[j].user_set = true; + output[j].wildcard_set = true; } } } else { @@ -2483,7 +2592,8 @@ parse: rc = -EINVAL; goto out; } - output[type].fields |= all_output_options[i].field; + output[type].user_set = true; + output[type].wildcard_set = true; } } @@ -2947,9 +3057,8 @@ static void script__setup_sample_type(struct perf_script *script) } } -static int process_stat_round_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +static int process_stat_round_event(struct perf_session *session, + union perf_event *event) { struct stat_round_event *round = &event->stat_round; struct perf_evsel *counter; @@ -2963,9 +3072,8 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, return 0; } -static int process_stat_config_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session __maybe_unused) +static int process_stat_config_event(struct perf_session *session __maybe_unused, + union perf_event *event) { perf_event__read_stat_config(&stat_config, &event->stat_config); return 0; @@ -2991,10 +3099,10 @@ static int set_maps(struct perf_script *script) } static -int process_thread_map_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int process_thread_map_event(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); if (script->threads) { @@ -3010,10 +3118,10 @@ int process_thread_map_event(struct perf_tool *tool, } static -int process_cpu_map_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session __maybe_unused) +int process_cpu_map_event(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_script *script = container_of(tool, struct perf_script, tool); if (script->cpus) { @@ -3028,12 +3136,21 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +static int process_feature_event(struct perf_session *session, + union perf_event *event) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(session, event); + return 0; +} + #ifdef HAVE_AUXTRACE_SUPPORT -static int perf_script__process_auxtrace_info(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +static int perf_script__process_auxtrace_info(struct perf_session *session, + union perf_event *event) { - int ret = perf_event__process_auxtrace_info(tool, event, session); + struct perf_tool *tool = session->tool; + + int ret = perf_event__process_auxtrace_info(session, event); if (ret == 0) { struct perf_script *script = container_of(tool, struct perf_script, tool); @@ -3047,6 +3164,44 @@ static int perf_script__process_auxtrace_info(struct perf_tool *tool, #define perf_script__process_auxtrace_info 0 #endif +static int parse_insn_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "+insn,-event,-period", 0); + itrace_parse_synth_opts(opt, "i0ns", 0); + nanosecs = true; + return 0; +} + +static int parse_xed(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + force_pager("xed -F insn: -A -64 | less"); + return 0; +} + +static int parse_call_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); + itrace_parse_synth_opts(opt, "cewp", 0); + nanosecs = true; + return 0; +} + +static int parse_callret_trace(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); + itrace_parse_synth_opts(opt, "crewp", 0); + nanosecs = true; + return 0; +} + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -3056,7 +3211,10 @@ int cmd_script(int argc, const char **argv) char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; - struct itrace_synth_opts itrace_synth_opts = { .set = false, }; + struct itrace_synth_opts itrace_synth_opts = { + .set = false, + .default_no_sample = true, + }; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -3072,7 +3230,7 @@ int cmd_script(int argc, const char **argv) .attr = process_attr, .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_script__process_auxtrace_info, @@ -3123,13 +3281,24 @@ int cmd_script(int argc, const char **argv) "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", + "addr,symoff,srcline,period,iregs,uregs,brstack," + "brstacksym,flags,bpf-output,brstackinsn,brstackoff," + "callindent,insn,insnlen,synth,phys_addr,metric,misc", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), + OPT_CALLBACK_OPTARG(0, "insn-trace", &itrace_synth_opts, NULL, NULL, + "Decode instructions from itrace", parse_insn_trace), + OPT_CALLBACK_OPTARG(0, "xed", NULL, NULL, NULL, + "Run xed disassembler on output", parse_xed), + OPT_CALLBACK_OPTARG(0, "call-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls from from itrace", parse_call_trace), + OPT_CALLBACK_OPTARG(0, "call-ret-trace", &itrace_synth_opts, NULL, NULL, + "Decode calls and returns from itrace", parse_callret_trace), + OPT_STRING(0, "graph-function", &symbol_conf.graph_function, "symbol[,symbol...]", + "Only print symbols and callees with --call-trace/--call-ret-trace"), OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]", "Stop display of callgraph at these symbols"), OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), @@ -3167,7 +3336,7 @@ int cmd_script(int argc, const char **argv) OPT_BOOLEAN(0, "ns", &nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", - "Instruction Tracing options", + "Instruction Tracing options\n" ITRACE_HELP, itrace_parse_synth_opts), OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, "Show full source file name path for source lines"), @@ -3363,8 +3532,10 @@ int cmd_script(int argc, const char **argv) exit(-1); } - if (!script_name) + if (!script_name) { setup_pager(); + use_browser = 0; + } session = perf_session__new(&data, false, &script.tool); if (session == NULL) @@ -3385,7 +3556,8 @@ int cmd_script(int argc, const char **argv) script.session = session; script__setup_sample_type(&script); - if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) + if ((output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT) || + symbol_conf.graph_function) itrace_synth_opts.thread_stack = true; session->itrace_synth_opts = &itrace_synth_opts; @@ -3403,9 +3575,9 @@ int cmd_script(int argc, const char **argv) symbol_conf.use_callchain = false; if (session->tevent.pevent && - pevent_set_function_resolver(session->tevent.pevent, - machine__resolve_kernel_addr, - &session->machines.host) < 0) { + tep_set_function_resolver(session->tevent.pevent, + machine__resolve_kernel_addr, + &session->machines.host) < 0) { pr_err("%s: failed to set libtraceevent function resolver\n", __func__); err = -1; goto out_delete; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f17dc601b0f3..a635abfa77b6 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/top.h" #include "asm/bug.h" #include <linux/time64.h> @@ -80,12 +81,13 @@ #include <sys/stat.h> #include <sys/wait.h> #include <unistd.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> #include "sane_ctype.h" #define DEFAULT_SEPARATOR " " -#define CNTR_NOT_SUPPORTED "<not supported>" -#define CNTR_NOT_COUNTED "<not counted>" #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" static void print_counters(struct timespec *ts, int argc, const char **argv); @@ -133,46 +135,30 @@ static const char *smi_cost_attrs = { static struct perf_evlist *evsel_list; -static struct rblist metric_events; - static struct target target = { .uid = UINT_MAX, }; -typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); +#define METRIC_ONLY_LEN 20 -static int run_count = 1; -static bool no_inherit = false; static volatile pid_t child_pid = -1; -static bool null_run = false; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; static bool smi_cost = false; static bool smi_reset = false; -static bool big_num = true; static int big_num_opt = -1; -static const char *csv_sep = NULL; -static bool csv_output = false; static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; -static unsigned int initial_delay = 0; -static unsigned int unit_width = 4; /* strlen("unit") */ static bool forever = false; -static bool metric_only = false; static bool force_metric_only = false; -static bool no_merge = false; static struct timespec ref_time; -static struct cpu_map *aggr_map; -static aggr_get_id_t aggr_get_id; static bool append_file; static bool interval_count; static const char *output_name; static int output_fd; -static int print_free_counters_hint; -static int print_mixed_hw_group_error; struct perf_stat { bool record; @@ -192,15 +178,15 @@ static struct perf_stat perf_stat; static volatile int done = 0; static struct perf_stat_config stat_config = { - .aggr_mode = AGGR_GLOBAL, - .scale = true, + .aggr_mode = AGGR_GLOBAL, + .scale = true, + .unit_width = 4, /* strlen("unit") */ + .run_count = 1, + .metric_only_len = METRIC_ONLY_LEN, + .walltime_nsecs_stats = &walltime_nsecs_stats, + .big_num = true, }; -static bool is_duration_time(struct perf_evsel *evsel) -{ - return !strcmp(evsel->name, "duration_time"); -} - static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -224,78 +210,6 @@ static void perf_stat__reset_stats(void) perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } -static int create_perf_stat_counter(struct perf_evsel *evsel) -{ - struct perf_event_attr *attr = &evsel->attr; - struct perf_evsel *leader = evsel->leader; - - if (stat_config.scale) { - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - } - - /* - * The event is part of non trivial group, let's enable - * the group read (for leader) and ID retrieval for all - * members. - */ - if (leader->nr_members > 1) - attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; - - attr->inherit = !no_inherit; - - /* - * Some events get initialized with sample_(period/type) set, - * like tracepoints. Clear it up for counting. - */ - attr->sample_period = 0; - - /* - * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless - * while avoiding that older tools show confusing messages. - * - * However for pipe sessions we need to keep it zero, - * because script's perf_evsel__check_attr is triggered - * by attr->sample_type != 0, and we can't run it on - * stat sessions. - */ - if (!(STAT_RECORD && perf_stat.data.is_pipe)) - attr->sample_type = PERF_SAMPLE_IDENTIFIER; - - /* - * Disabling all counters initially, they will be enabled - * either manually by us or by kernel via enable_on_exec - * set later. - */ - if (perf_evsel__is_group_leader(evsel)) { - attr->disabled = 1; - - /* - * In case of initial_delay we enable tracee - * events manually. - */ - if (target__none(&target) && !initial_delay) - attr->enable_on_exec = 1; - } - - if (target__has_cpu(&target) && !target__has_per_thread(&target)) - return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); - - return perf_evsel__open_per_thread(evsel, evsel_list->threads); -} - -/* - * Does the counter have nsecs as a unit? - */ -static inline int nsec_counter(struct perf_evsel *evsel) -{ - if (perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || - perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) - return 1; - - return 0; -} - static int process_synthesized_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -428,15 +342,15 @@ static void process_interval(void) static void enable_counters(void) { - if (initial_delay) - usleep(initial_delay * USEC_PER_MSEC); + if (stat_config.initial_delay) + usleep(stat_config.initial_delay * USEC_PER_MSEC); /* * We need to enable counters only if: * - we don't have tracee (attaching to task or cpu) * - we have initial delay configured */ - if (!target__none(&target) || initial_delay) + if (!target__none(&target) || stat_config.initial_delay) perf_evlist__enable(evsel_list); } @@ -464,112 +378,34 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static int perf_stat_synthesize_config(bool is_pipe) -{ - int err; - - if (is_pipe) { - err = perf_event__synthesize_attrs(NULL, perf_stat.session, - process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize attrs.\n"); - return err; - } - } - - err = perf_event__synthesize_extra_attr(NULL, - evsel_list, - process_synthesized_event, - is_pipe); - - err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, - process_synthesized_event, - NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus, - process_synthesized_event, NULL); - if (err < 0) { - pr_err("Couldn't synthesize thread map.\n"); - return err; - } - - err = perf_event__synthesize_stat_config(NULL, &stat_config, - process_synthesized_event, NULL); - if (err < 0) { - pr_err("Couldn't synthesize config.\n"); - return err; - } - - return 0; -} - -#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) - -static int __store_counter_ids(struct perf_evsel *counter) -{ - int cpu, thread; - - for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) { - for (thread = 0; thread < xyarray__max_y(counter->fd); - thread++) { - int fd = FD(counter, cpu, thread); - - if (perf_evlist__id_add_fd(evsel_list, counter, - cpu, thread, fd) < 0) - return -1; - } - } - - return 0; -} - -static int store_counter_ids(struct perf_evsel *counter) -{ - struct cpu_map *cpus = counter->cpus; - struct thread_map *threads = counter->threads; - - if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr)) - return -ENOMEM; - - return __store_counter_ids(counter); -} - static bool perf_evsel__should_store_id(struct perf_evsel *counter) { return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID; } -static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel) +static bool is_target_alive(struct target *_target, + struct thread_map *threads) { - struct perf_evsel *c2, *leader; - bool is_open = true; + struct stat st; + int i; - leader = evsel->leader; - pr_debug("Weak group for %s/%d failed\n", - leader->name, leader->nr_members); + if (!target__has_task(_target)) + return true; - /* - * for_each_group_member doesn't work here because it doesn't - * include the first entry. - */ - evlist__for_each_entry(evsel_list, c2) { - if (c2 == evsel) - is_open = false; - if (c2->leader == leader) { - if (is_open) - perf_evsel__close(c2); - c2->leader = c2; - c2->nr_members = 0; - } + for (i = 0; i < threads->nr; i++) { + char path[PATH_MAX]; + + scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(), + threads->map[i].pid); + + if (!stat(path, &st)) + return true; } - return leader; + + return false; } -static int __run_perf_stat(int argc, const char **argv) +static int __run_perf_stat(int argc, const char **argv, int run_idx) { int interval = stat_config.interval; int times = stat_config.times; @@ -609,13 +445,13 @@ static int __run_perf_stat(int argc, const char **argv) evlist__for_each_entry(evsel_list, counter) { try_again: - if (create_perf_stat_counter(counter) < 0) { + if (create_perf_stat_counter(counter, &stat_config, &target) < 0) { /* Weak group failed. Reset the group. */ if ((errno == EINVAL || errno == EBADF) && counter->leader != counter && counter->weak_group) { - counter = perf_evsel__reset_weak_group(counter); + counter = perf_evlist__reset_weak_group(evsel_list, counter); goto try_again; } @@ -664,11 +500,11 @@ try_again: counter->supported = true; l = strlen(counter->unit); - if (l > unit_width) - unit_width = l; + if (l > stat_config.unit_width) + stat_config.unit_width = l; if (perf_evsel__should_store_id(counter) && - store_counter_ids(counter)) + perf_evsel__store_ids(counter, evsel_list)) return -1; } @@ -699,7 +535,8 @@ try_again: if (err < 0) return err; - err = perf_stat_synthesize_config(is_pipe); + err = perf_stat_synthesize_config(&stat_config, NULL, evsel_list, + process_synthesized_event, is_pipe); if (err < 0) return err; } @@ -724,7 +561,7 @@ try_again: break; } } - waitpid(child_pid, &status, 0); + wait4(child_pid, &status, 0, &stat_config.ru_data); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); @@ -738,6 +575,8 @@ try_again: enable_counters(); while (!done) { nanosleep(&ts, NULL); + if (!is_target_alive(&target, evsel_list->threads)) + break; if (timeout) break; if (interval) { @@ -752,6 +591,9 @@ try_again: t1 = rdclock(); + if (stat_config.walltime_run_table) + stat_config.walltime_run[run_idx] = t1 - t0; + update_stats(&walltime_nsecs_stats, t1 - t0); /* @@ -766,7 +608,7 @@ try_again: return WEXITSTATUS(status); } -static int run_perf_stat(int argc, const char **argv) +static int run_perf_stat(int argc, const char **argv, int run_idx) { int ret; @@ -779,7 +621,7 @@ static int run_perf_stat(int argc, const char **argv) if (sync_run) sync(); - ret = __run_perf_stat(argc, argv); + ret = __run_perf_stat(argc, argv, run_idx); if (ret) return ret; @@ -792,1076 +634,14 @@ static int run_perf_stat(int argc, const char **argv) return ret; } -static void print_running(u64 run, u64 ena) -{ - if (csv_output) { - fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", - csv_sep, - run, - csv_sep, - ena ? 100.0 * run / ena : 100.0); - } else if (run != ena) { - fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); - } -} - -static void print_noise_pct(double total, double avg) -{ - double pct = rel_stddev_stats(total, avg); - - if (csv_output) - fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); - else if (pct) - fprintf(stat_config.output, " ( +-%6.2f%% )", pct); -} - -static void print_noise(struct perf_evsel *evsel, double avg) -{ - struct perf_stat_evsel *ps; - - if (run_count == 1) - return; - - ps = evsel->stats; - print_noise_pct(stddev_stats(&ps->res_stats[0]), avg); -} - -static void aggr_printout(struct perf_evsel *evsel, int id, int nr) -{ - switch (stat_config.aggr_mode) { - case AGGR_CORE: - fprintf(stat_config.output, "S%d-C%*d%s%*d%s", - cpu_map__id_to_socket(id), - csv_output ? 0 : -8, - cpu_map__id_to_cpu(id), - csv_sep, - csv_output ? 0 : 4, - nr, - csv_sep); - break; - case AGGR_SOCKET: - fprintf(stat_config.output, "S%*d%s%*d%s", - csv_output ? 0 : -5, - id, - csv_sep, - csv_output ? 0 : 4, - nr, - csv_sep); - break; - case AGGR_NONE: - fprintf(stat_config.output, "CPU%*d%s", - csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], csv_sep); - break; - case AGGR_THREAD: - fprintf(stat_config.output, "%*s-%*d%s", - csv_output ? 0 : 16, - thread_map__comm(evsel->threads, id), - csv_output ? 0 : -8, - thread_map__pid(evsel->threads, id), - csv_sep); - break; - case AGGR_GLOBAL: - case AGGR_UNSET: - default: - break; - } -} - -struct outstate { - FILE *fh; - bool newline; - const char *prefix; - int nfields; - int id, nr; - struct perf_evsel *evsel; -}; - -#define METRIC_LEN 35 - -static void new_line_std(void *ctx) -{ - struct outstate *os = ctx; - - os->newline = true; -} - -static void do_new_line_std(struct outstate *os) -{ - fputc('\n', os->fh); - fputs(os->prefix, os->fh); - aggr_printout(os->evsel, os->id, os->nr); - if (stat_config.aggr_mode == AGGR_NONE) - fprintf(os->fh, " "); - fprintf(os->fh, " "); -} - -static void print_metric_std(void *ctx, const char *color, const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - int n; - bool newline = os->newline; - - os->newline = false; - - if (unit == NULL || fmt == NULL) { - fprintf(out, "%-*s", METRIC_LEN, ""); - return; - } - - if (newline) - do_new_line_std(os); - - n = fprintf(out, " # "); - if (color) - n += color_fprintf(out, color, fmt, val); - else - n += fprintf(out, fmt, val); - fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); -} - -static void new_line_csv(void *ctx) -{ - struct outstate *os = ctx; - int i; - - fputc('\n', os->fh); - if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, csv_sep); - aggr_printout(os->evsel, os->id, os->nr); - for (i = 0; i < os->nfields; i++) - fputs(csv_sep, os->fh); -} - -static void print_metric_csv(void *ctx, - const char *color __maybe_unused, - const char *fmt, const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - - if (unit == NULL || fmt == NULL) { - fprintf(out, "%s%s", csv_sep, csv_sep); - return; - } - snprintf(buf, sizeof(buf), fmt, val); - ends = vals = ltrim(buf); - while (isdigit(*ends) || *ends == '.') - ends++; - *ends = 0; - while (isspace(*unit)) - unit++; - fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); -} - -#define METRIC_ONLY_LEN 20 - -/* Filter out some columns that don't work well in metrics only mode */ - -static bool valid_only_metric(const char *unit) -{ - if (!unit) - return false; - if (strstr(unit, "/sec") || - strstr(unit, "hz") || - strstr(unit, "Hz") || - strstr(unit, "CPUs utilized")) - return false; - return true; -} - -static const char *fixunit(char *buf, struct perf_evsel *evsel, - const char *unit) -{ - if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), - unit); - return buf; - } - return unit; -} - -static void print_metric_only(void *ctx, const char *color, const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - int n; - char buf[1024]; - unsigned mlen = METRIC_ONLY_LEN; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(buf, os->evsel, unit); - if (color) - n = color_fprintf(out, color, fmt, val); - else - n = fprintf(out, fmt, val); - if (n > METRIC_ONLY_LEN) - n = METRIC_ONLY_LEN; - if (mlen < strlen(unit)) - mlen = strlen(unit) + 1; - fprintf(out, "%*s", mlen - n, ""); -} - -static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, - const char *fmt, - const char *unit, double val) -{ - struct outstate *os = ctx; - FILE *out = os->fh; - char buf[64], *vals, *ends; - char tbuf[1024]; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(tbuf, os->evsel, unit); - snprintf(buf, sizeof buf, fmt, val); - ends = vals = ltrim(buf); - while (isdigit(*ends) || *ends == '.') - ends++; - *ends = 0; - fprintf(out, "%s%s", vals, csv_sep); -} - -static void new_line_metric(void *ctx __maybe_unused) -{ -} - -static void print_metric_header(void *ctx, const char *color __maybe_unused, - const char *fmt __maybe_unused, - const char *unit, double val __maybe_unused) -{ - struct outstate *os = ctx; - char tbuf[1024]; - - if (!valid_only_metric(unit)) - return; - unit = fixunit(tbuf, os->evsel, unit); - if (csv_output) - fprintf(os->fh, "%s%s", unit, csv_sep); - else - fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); -} - -static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) -{ - FILE *output = stat_config.output; - double msecs = avg / NSEC_PER_MSEC; - const char *fmt_v, *fmt_n; - char name[25]; - - fmt_v = csv_output ? "%.6f%s" : "%18.6f%s"; - fmt_n = csv_output ? "%s" : "%-25s"; - - aggr_printout(evsel, id, nr); - - scnprintf(name, sizeof(name), "%s%s", - perf_evsel__name(evsel), csv_output ? "" : " (msec)"); - - fprintf(output, fmt_v, msecs, csv_sep); - - if (csv_output) - fprintf(output, "%s%s", evsel->unit, csv_sep); - else - fprintf(output, "%-*s%s", unit_width, evsel->unit, csv_sep); - - fprintf(output, fmt_n, name); - - if (evsel->cgrp) - fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); -} - -static int first_shadow_cpu(struct perf_evsel *evsel, int id) -{ - int i; - - if (!aggr_get_id) - return 0; - - if (stat_config.aggr_mode == AGGR_NONE) - return id; - - if (stat_config.aggr_mode == AGGR_GLOBAL) - return 0; - - for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { - int cpu2 = perf_evsel__cpus(evsel)->map[i]; - - if (aggr_get_id(evsel_list->cpus, cpu2) == id) - return cpu2; - } - return 0; -} - -static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) -{ - FILE *output = stat_config.output; - double sc = evsel->scale; - const char *fmt; - - if (csv_output) { - fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; - } else { - if (big_num) - fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; - } - - aggr_printout(evsel, id, nr); - - fprintf(output, fmt, avg, csv_sep); - - if (evsel->unit) - fprintf(output, "%-*s%s", - csv_output ? 0 : unit_width, - evsel->unit, csv_sep); - - fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel)); - - if (evsel->cgrp) - fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); -} - -static bool is_mixed_hw_group(struct perf_evsel *counter) -{ - struct perf_evlist *evlist = counter->evlist; - u32 pmu_type = counter->attr.type; - struct perf_evsel *pos; - - if (counter->nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->attr.type; - continue; - } - if (pmu_type != pos->attr.type) - return true; - } - - return false; -} - -static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise, - struct runtime_stat *st) -{ - struct perf_stat_output_ctx out; - struct outstate os = { - .fh = stat_config.output, - .prefix = prefix ? prefix : "", - .id = id, - .nr = nr, - .evsel = counter, - }; - print_metric_t pm = print_metric_std; - void (*nl)(void *); - - if (metric_only) { - nl = new_line_metric; - if (csv_output) - pm = print_metric_only_csv; - else - pm = print_metric_only; - } else - nl = new_line_std; - - if (csv_output && !metric_only) { - static int aggr_fields[] = { - [AGGR_GLOBAL] = 0, - [AGGR_THREAD] = 1, - [AGGR_NONE] = 1, - [AGGR_SOCKET] = 2, - [AGGR_CORE] = 2, - }; - - pm = print_metric_csv; - nl = new_line_csv; - os.nfields = 3; - os.nfields += aggr_fields[stat_config.aggr_mode]; - if (counter->cgrp) - os.nfields++; - } - if (run == 0 || ena == 0 || counter->counts->scaled == -1) { - if (metric_only) { - pm(&os, NULL, "", "", 0); - return; - } - aggr_printout(counter, id, nr); - - fprintf(stat_config.output, "%*s%s", - csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - csv_sep); - - if (counter->supported) { - print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - print_mixed_hw_group_error = 1; - } - - fprintf(stat_config.output, "%-*s%s", - csv_output ? 0 : unit_width, - counter->unit, csv_sep); - - fprintf(stat_config.output, "%*s", - csv_output ? 0 : -25, - perf_evsel__name(counter)); - - if (counter->cgrp) - fprintf(stat_config.output, "%s%s", - csv_sep, counter->cgrp->name); - - if (!csv_output) - pm(&os, NULL, NULL, "", 0); - print_noise(counter, noise); - print_running(run, ena); - if (csv_output) - pm(&os, NULL, NULL, "", 0); - return; - } - - if (metric_only) - /* nothing */; - else if (nsec_counter(counter)) - nsec_printout(id, nr, counter, uval); - else - abs_printout(id, nr, counter, uval); - - out.print_metric = pm; - out.new_line = nl; - out.ctx = &os; - out.force_header = false; - - if (csv_output && !metric_only) { - print_noise(counter, noise); - print_running(run, ena); - } - - perf_stat__print_shadow_stats(counter, uval, - first_shadow_cpu(counter, id), - &out, &metric_events, st); - if (!csv_output && !metric_only) { - print_noise(counter, noise); - print_running(run, ena); - } -} - -static void aggr_update_shadow(void) -{ - int cpu, s2, id, s; - u64 val; - struct perf_evsel *counter; - - for (s = 0; s < aggr_map->nr; s++) { - id = aggr_map->map[s]; - evlist__for_each_entry(evsel_list, counter) { - val = 0; - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - s2 = aggr_get_id(evsel_list->cpus, cpu); - if (s2 != id) - continue; - val += perf_counts(counter->counts, cpu, 0)->val; - } - perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id), - &rt_stat); - } - } -} - -static void uniquify_event_name(struct perf_evsel *counter) -{ - char *new_name; - char *config; - - if (counter->uniquified_name || - !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, - strlen(counter->pmu_name))) - return; - - config = strchr(counter->name, '/'); - if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (asprintf(&new_name, - "%s [%s]", counter->name, counter->pmu_name) > 0) { - free(counter->name); - counter->name = new_name; - } - } - - counter->uniquified_name = true; -} - -static void collect_all_aliases(struct perf_evsel *counter, - void (*cb)(struct perf_evsel *counter, void *data, - bool first), - void *data) -{ - struct perf_evsel *alias; - - alias = list_prepare_entry(counter, &(evsel_list->entries), node); - list_for_each_entry_continue (alias, &evsel_list->entries, node) { - if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || - alias->scale != counter->scale || - alias->cgrp != counter->cgrp || - strcmp(alias->unit, counter->unit) || - nsec_counter(alias) != nsec_counter(counter)) - break; - alias->merged_stat = true; - cb(alias, data, false); - } -} - -static bool collect_data(struct perf_evsel *counter, - void (*cb)(struct perf_evsel *counter, void *data, - bool first), - void *data) -{ - if (counter->merged_stat) - return false; - cb(counter, data, true); - if (no_merge) - uniquify_event_name(counter); - else if (counter->auto_merge_stats) - collect_all_aliases(counter, cb, data); - return true; -} - -struct aggr_data { - u64 ena, run, val; - int id; - int nr; - int cpu; -}; - -static void aggr_cb(struct perf_evsel *counter, void *data, bool first) -{ - struct aggr_data *ad = data; - int cpu, s2; - - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - struct perf_counts_values *counts; - - s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); - if (s2 != ad->id) - continue; - if (first) - ad->nr++; - counts = perf_counts(counter->counts, cpu, 0); - /* - * When any result is bad, make them all to give - * consistent output in interval mode. - */ - if (counts->ena == 0 || counts->run == 0 || - counter->counts->scaled == -1) { - ad->ena = 0; - ad->run = 0; - break; - } - ad->val += counts->val; - ad->ena += counts->ena; - ad->run += counts->run; - } -} - -static void print_aggr(char *prefix) -{ - FILE *output = stat_config.output; - struct perf_evsel *counter; - int s, id, nr; - double uval; - u64 ena, run, val; - bool first; - - if (!(aggr_map || aggr_get_id)) - return; - - aggr_update_shadow(); - - /* - * With metric_only everything is on a single line. - * Without each counter has its own line. - */ - for (s = 0; s < aggr_map->nr; s++) { - struct aggr_data ad; - if (prefix && metric_only) - fprintf(output, "%s", prefix); - - ad.id = id = aggr_map->map[s]; - first = true; - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(counter, aggr_cb, &ad)) - continue; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (first && metric_only) { - first = false; - aggr_printout(counter, id, nr); - } - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0, - &rt_stat); - if (!metric_only) - fputc('\n', output); - } - if (metric_only) - fputc('\n', output); - } -} - -static int cmp_val(const void *a, const void *b) -{ - return ((struct perf_aggr_thread_value *)b)->val - - ((struct perf_aggr_thread_value *)a)->val; -} - -static struct perf_aggr_thread_value *sort_aggr_thread( - struct perf_evsel *counter, - int nthreads, int ncpus, - int *ret) -{ - int cpu, thread, i = 0; - double uval; - struct perf_aggr_thread_value *buf; - - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); - if (!buf) - return NULL; - - for (thread = 0; thread < nthreads; thread++) { - u64 ena = 0, run = 0, val = 0; - - for (cpu = 0; cpu < ncpus; cpu++) { - val += perf_counts(counter->counts, cpu, thread)->val; - ena += perf_counts(counter->counts, cpu, thread)->ena; - run += perf_counts(counter->counts, cpu, thread)->run; - } - - uval = val * counter->scale; - - /* - * Skip value 0 when enabling --per-thread globally, - * otherwise too many 0 output. - */ - if (uval == 0.0 && target__has_per_thread(&target)) - continue; - - buf[i].counter = counter; - buf[i].id = thread; - buf[i].uval = uval; - buf[i].val = val; - buf[i].run = run; - buf[i].ena = ena; - i++; - } - - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); - - if (ret) - *ret = i; - - return buf; -} - -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) -{ - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int thread, sorted_threads, id; - struct perf_aggr_thread_value *buf; - - buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); - if (!buf) { - perror("cannot sort aggr thread"); - return; - } - - for (thread = 0; thread < sorted_threads; thread++) { - if (prefix) - fprintf(output, "%s", prefix); - - id = buf[thread].id; - if (stat_config.stats) - printout(id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &stat_config.stats[id]); - else - printout(id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); - fputc('\n', output); - } - - free(buf); -} - -struct caggr_data { - double avg, avg_enabled, avg_running; -}; - -static void counter_aggr_cb(struct perf_evsel *counter, void *data, - bool first __maybe_unused) -{ - struct caggr_data *cd = data; - struct perf_stat_evsel *ps = counter->stats; - - cd->avg += avg_stats(&ps->res_stats[0]); - cd->avg_enabled += avg_stats(&ps->res_stats[1]); - cd->avg_running += avg_stats(&ps->res_stats[2]); -} - -/* - * Print out the results of a single counter: - * aggregated counts in system-wide mode - */ -static void print_counter_aggr(struct perf_evsel *counter, char *prefix) -{ - FILE *output = stat_config.output; - double uval; - struct caggr_data cd = { .avg = 0.0 }; - - if (!collect_data(counter, counter_aggr_cb, &cd)) - return; - - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, - cd.avg, &rt_stat); - if (!metric_only) - fprintf(output, "\n"); -} - -static void counter_cb(struct perf_evsel *counter, void *data, - bool first __maybe_unused) -{ - struct aggr_data *ad = data; - - ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; -} - -/* - * Print out the results of a single counter: - * does not use aggregated count in system-wide - */ -static void print_counter(struct perf_evsel *counter, char *prefix) -{ - FILE *output = stat_config.output; - u64 ena, run, val; - double uval; - int cpu; - - for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - struct aggr_data ad = { .cpu = cpu }; - - if (!collect_data(counter, counter_cb, &ad)) - return; - val = ad.val; - ena = ad.ena; - run = ad.run; - - if (prefix) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); - - fputc('\n', output); - } -} - -static void print_no_aggr_metric(char *prefix) -{ - int cpu; - int nrcpus = 0; - struct perf_evsel *counter; - u64 ena, run, val; - double uval; - - nrcpus = evsel_list->cpus->nr; - for (cpu = 0; cpu < nrcpus; cpu++) { - bool first = true; - - if (prefix) - fputs(prefix, stat_config.output); - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - if (first) { - aggr_printout(counter, cpu, 0); - first = false; - } - val = perf_counts(counter->counts, cpu, 0)->val; - ena = perf_counts(counter->counts, cpu, 0)->ena; - run = perf_counts(counter->counts, cpu, 0)->run; - - uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); - } - fputc('\n', stat_config.output); - } -} - -static int aggr_header_lens[] = { - [AGGR_CORE] = 18, - [AGGR_SOCKET] = 12, - [AGGR_NONE] = 6, - [AGGR_THREAD] = 24, - [AGGR_GLOBAL] = 0, -}; - -static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_SOCKET] = "socket,cpus", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_GLOBAL] = "" -}; - -static void print_metric_headers(const char *prefix, bool no_indent) -{ - struct perf_stat_output_ctx out; - struct perf_evsel *counter; - struct outstate os = { - .fh = stat_config.output - }; - - if (prefix) - fprintf(stat_config.output, "%s", prefix); - - if (!csv_output && !no_indent) - fprintf(stat_config.output, "%*s", - aggr_header_lens[stat_config.aggr_mode], ""); - if (csv_output) { - if (stat_config.interval) - fputs("time,", stat_config.output); - fputs(aggr_header_csv[stat_config.aggr_mode], - stat_config.output); - } - - /* Print metrics headers only */ - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - os.evsel = counter; - out.ctx = &os; - out.print_metric = print_metric_header; - out.new_line = new_line_metric; - out.force_header = true; - os.evsel = counter; - perf_stat__print_shadow_stats(counter, 0, - 0, - &out, - &metric_events, - &rt_stat); - } - fputc('\n', stat_config.output); -} - -static void print_interval(char *prefix, struct timespec *ts) -{ - FILE *output = stat_config.output; - static int num_print_interval; - - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - - if (num_print_interval == 0 && !csv_output) { - switch (stat_config.aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_THREAD: - fprintf(output, "# time comm-pid"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - case AGGR_UNSET: - break; - } - } - - if (num_print_interval == 0 && metric_only) - print_metric_headers(" ", true); - if (++num_print_interval == 25) - num_print_interval = 0; -} - -static void print_header(int argc, const char **argv) -{ - FILE *output = stat_config.output; - int i; - - fflush(stdout); - - if (!csv_output) { - fprintf(output, "\n"); - fprintf(output, " Performance counter stats for "); - if (target.system_wide) - fprintf(output, "\'system wide"); - else if (target.cpu_list) - fprintf(output, "\'CPU(s) %s", target.cpu_list); - else if (!target__has_task(&target)) { - fprintf(output, "\'%s", argv ? argv[0] : "pipe"); - for (i = 1; argv && (i < argc); i++) - fprintf(output, " %s", argv[i]); - } else if (target.pid) - fprintf(output, "process id \'%s", target.pid); - else - fprintf(output, "thread id \'%s", target.tid); - - fprintf(output, "\'"); - if (run_count > 1) - fprintf(output, " (%d runs)", run_count); - fprintf(output, ":\n\n"); - } -} - -static void print_footer(void) -{ - FILE *output = stat_config.output; - int n; - - if (!null_run) - fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); - } - fprintf(output, "\n\n"); - - if (print_free_counters_hint && - sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && - n > 0) - fprintf(output, -"Some events weren't counted. Try disabling the NMI watchdog:\n" -" echo 0 > /proc/sys/kernel/nmi_watchdog\n" -" perf stat ...\n" -" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); -} - static void print_counters(struct timespec *ts, int argc, const char **argv) { - int interval = stat_config.interval; - struct perf_evsel *counter; - char buf[64], *prefix = NULL; - /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; - if (interval) - print_interval(prefix = buf, ts); - else - print_header(argc, argv); - - if (metric_only) { - static int num_print_iv; - - if (num_print_iv == 0 && !interval) - print_metric_headers(prefix, false); - if (num_print_iv++ == 25) - num_print_iv = 0; - if (stat_config.aggr_mode == AGGR_GLOBAL && prefix) - fprintf(stat_config.output, "%s", prefix); - } - - switch (stat_config.aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(prefix); - break; - case AGGR_THREAD: - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - print_aggr_thread(counter, prefix); - } - break; - case AGGR_GLOBAL: - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - print_counter_aggr(counter, prefix); - } - if (metric_only) - fputc('\n', stat_config.output); - break; - case AGGR_NONE: - if (metric_only) - print_no_aggr_metric(prefix); - else { - evlist__for_each_entry(evsel_list, counter) { - if (is_duration_time(counter)) - continue; - print_counter(counter, prefix); - } - } - break; - case AGGR_UNSET: - default: - break; - } - - if (!interval && !csv_output) - print_footer(); - - fflush(stat_config.output); + perf_evlist__print_counters(evsel_list, &stat_config, &target, + ts, argc, argv); } static volatile int signr = -1; @@ -1918,7 +698,7 @@ static int enable_metric_only(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset) { force_metric_only = true; - metric_only = !unset; + stat_config.metric_only = !unset; return 0; } @@ -1926,7 +706,7 @@ static int parse_metric_groups(const struct option *opt, const char *str, int unset __maybe_unused) { - return metricgroup__parse_groups(opt, str, &metric_events); + return metricgroup__parse_groups(opt, str, &stat_config.metric_events); } static const struct option stat_options[] = { @@ -1937,7 +717,7 @@ static const struct option stat_options[] = { parse_events_option), OPT_CALLBACK(0, "filter", &evsel_list, "filter", "event filter", parse_filter), - OPT_BOOLEAN('i', "no-inherit", &no_inherit, + OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit, "child tasks do not inherit counters"), OPT_STRING('p', "pid", &target.pid, "pid", "stat events on existing process id"), @@ -1950,9 +730,11 @@ static const struct option stat_options[] = { OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_INTEGER('r', "repeat", &run_count, + OPT_INTEGER('r', "repeat", &stat_config.run_count, "repeat command and print average + stddev (max: 100, forever: 0)"), - OPT_BOOLEAN('n', "null", &null_run, + OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table, + "display details about each run (only with -r option)"), + OPT_BOOLEAN('n', "null", &stat_config.null_run, "null run - dont start any counters"), OPT_INCR('d', "detailed", &detailed_run, "detailed run - start a lot of events"), @@ -1965,8 +747,8 @@ static const struct option stat_options[] = { "list of cpus to monitor in system-wide"), OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, "disable CPU count aggregation", AGGR_NONE), - OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"), - OPT_STRING('x', "field-separator", &csv_sep, "separator", + OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"), + OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator", "print counts with custom separator"), OPT_CALLBACK('G', "cgroup", &evsel_list, "name", "monitor event in cgroup name only", parse_cgroups), @@ -1983,6 +765,8 @@ static const struct option stat_options[] = { "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), + OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear, + "clear screen in between new interval"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, @@ -1991,9 +775,9 @@ static const struct option stat_options[] = { "aggregate counts per physical processor core", AGGR_CORE), OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, "aggregate counts per thread", AGGR_THREAD), - OPT_UINTEGER('D', "delay", &initial_delay, + OPT_UINTEGER('D', "delay", &stat_config.initial_delay, "ms to wait before starting measurement after program start"), - OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL, + OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL, "Only print computed metrics. No raw values", enable_metric_only), OPT_BOOLEAN(0, "topdown", &topdown_run, "measure topdown level 1 statistics"), @@ -2005,12 +789,14 @@ static const struct option stat_options[] = { OPT_END() }; -static int perf_stat__get_socket(struct cpu_map *map, int cpu) +static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) { return cpu_map__get_socket(map, cpu, NULL); } -static int perf_stat__get_core(struct cpu_map *map, int cpu) +static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int cpu) { return cpu_map__get_core(map, cpu, NULL); } @@ -2027,9 +813,8 @@ static int cpu_map__get_max(struct cpu_map *map) return max; } -static struct cpu_map *cpus_aggr_map; - -static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx) +static int perf_stat__get_aggr(struct perf_stat_config *config, + aggr_get_id_t get_id, struct cpu_map *map, int idx) { int cpu; @@ -2038,20 +823,22 @@ static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int id cpu = map->map[idx]; - if (cpus_aggr_map->map[cpu] == -1) - cpus_aggr_map->map[cpu] = get_id(map, idx); + if (config->cpus_aggr_map->map[cpu] == -1) + config->cpus_aggr_map->map[cpu] = get_id(config, map, idx); - return cpus_aggr_map->map[cpu]; + return config->cpus_aggr_map->map[cpu]; } -static int perf_stat__get_socket_cached(struct cpu_map *map, int idx) +static int perf_stat__get_socket_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) { - return perf_stat__get_aggr(perf_stat__get_socket, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); } -static int perf_stat__get_core_cached(struct cpu_map *map, int idx) +static int perf_stat__get_core_cached(struct perf_stat_config *config, + struct cpu_map *map, int idx) { - return perf_stat__get_aggr(perf_stat__get_core, map, idx); + return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); } static int perf_stat_init_aggr_mode(void) @@ -2060,18 +847,18 @@ static int perf_stat_init_aggr_mode(void) switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { + if (cpu_map__build_socket_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = perf_stat__get_socket_cached; + stat_config.aggr_get_id = perf_stat__get_socket_cached; break; case AGGR_CORE: - if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) { + if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = perf_stat__get_core_cached; + stat_config.aggr_get_id = perf_stat__get_core_cached; break; case AGGR_NONE: case AGGR_GLOBAL: @@ -2087,16 +874,16 @@ static int perf_stat_init_aggr_mode(void) * the aggregation translate cpumap. */ nr = cpu_map__get_max(evsel_list->cpus); - cpus_aggr_map = cpu_map__empty_new(nr + 1); - return cpus_aggr_map ? 0 : -ENOMEM; + stat_config.cpus_aggr_map = cpu_map__empty_new(nr + 1); + return stat_config.cpus_aggr_map ? 0 : -ENOMEM; } static void perf_stat__exit_aggr_mode(void) { - cpu_map__put(aggr_map); - cpu_map__put(cpus_aggr_map); - aggr_map = NULL; - cpus_aggr_map = NULL; + cpu_map__put(stat_config.aggr_map); + cpu_map__put(stat_config.cpus_aggr_map); + stat_config.aggr_map = NULL; + stat_config.cpus_aggr_map = NULL; } static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx) @@ -2154,12 +941,14 @@ static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, return cpu_map__build_map(cpus, corep, perf_env__get_core, env); } -static int perf_stat__get_socket_file(struct cpu_map *map, int idx) +static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) { return perf_env__get_socket(map, idx, &perf_stat.session->header.env); } -static int perf_stat__get_core_file(struct cpu_map *map, int idx) +static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, + struct cpu_map *map, int idx) { return perf_env__get_core(map, idx, &perf_stat.session->header.env); } @@ -2170,18 +959,18 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) switch (stat_config.aggr_mode) { case AGGR_SOCKET: - if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) { + if (perf_env__build_socket_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build socket map"); return -1; } - aggr_get_id = perf_stat__get_socket_file; + stat_config.aggr_get_id = perf_stat__get_socket_file; break; case AGGR_CORE: - if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) { + if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { perror("cannot build core map"); return -1; } - aggr_get_id = perf_stat__get_core_file; + stat_config.aggr_get_id = perf_stat__get_core_file; break; case AGGR_NONE: case AGGR_GLOBAL: @@ -2362,13 +1151,24 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; + struct parse_events_error errinfo; /* Set attrs if no event is selected and !null_run: */ - if (null_run) + if (stat_config.null_run) return 0; if (transaction_run) { - struct parse_events_error errinfo; + /* Handle -T as -M transaction. Once platform specific metrics + * support has been added to the json files, all archictures + * will use this approach. To determine transaction support + * on an architecture test for such a metric name. + */ + if (metricgroup__has_metric("transaction")) { + struct option opt = { .value = &evsel_list }; + + return metricgroup__parse_groups(&opt, "transaction", + &stat_config.metric_events); + } if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) @@ -2380,6 +1180,7 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); + parse_events_print_error(&errinfo, transaction_attrs); return -1; } return 0; @@ -2404,11 +1205,12 @@ static int add_default_attributes(void) if (pmu_have_event("msr", "aperf") && pmu_have_event("msr", "smi")) { if (!force_metric_only) - metric_only = true; - err = parse_events(evsel_list, smi_cost_attrs, NULL); + stat_config.metric_only = true; + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); } else { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + parse_events_print_error(&errinfo, smi_cost_attrs); return -1; } if (err) { @@ -2434,7 +1236,7 @@ static int add_default_attributes(void) } if (!force_metric_only) - metric_only = true; + stat_config.metric_only = true; if (topdown_filter_events(topdown_attrs, &str, arch_topdown_check_group(&warn)) < 0) { pr_err("Out of memory\n"); @@ -2443,12 +1245,13 @@ static int add_default_attributes(void) if (topdown_attrs[0] && str) { if (warn) arch_topdown_group_warn(); - err = parse_events(evsel_list, str, NULL); + err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); free(str); + parse_events_print_error(&errinfo, str); return -1; } } else { @@ -2530,7 +1333,7 @@ static int __cmd_record(int argc, const char **argv) if (output_name) data->file.path = output_name; - if (run_count != 1 || forever) { + if (stat_config.run_count != 1 || forever) { pr_err("Cannot use -r option with perf stat record.\n"); return -1; } @@ -2549,9 +1352,8 @@ static int __cmd_record(int argc, const char **argv) return argc; } -static int process_stat_round_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +static int process_stat_round_event(struct perf_session *session, + union perf_event *event) { struct stat_round_event *stat_round = &event->stat_round; struct perf_evsel *counter; @@ -2576,10 +1378,10 @@ static int process_stat_round_event(struct perf_tool *tool __maybe_unused, } static -int process_stat_config_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int process_stat_config_event(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_stat *st = container_of(tool, struct perf_stat, tool); perf_event__read_stat_config(&stat_config, &event->stat_config); @@ -2619,10 +1421,10 @@ static int set_maps(struct perf_stat *st) } static -int process_thread_map_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int process_thread_map_event(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_stat *st = container_of(tool, struct perf_stat, tool); if (st->threads) { @@ -2638,10 +1440,10 @@ int process_thread_map_event(struct perf_tool *tool, } static -int process_cpu_map_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int process_cpu_map_event(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct perf_stat *st = container_of(tool, struct perf_stat, tool); struct cpu_map *cpus; @@ -2803,12 +1605,12 @@ int cmd_stat(int argc, const char **argv) perf_stat__collect_metric_expr(evsel_list); perf_stat__init_shadow_stats(); - if (csv_sep) { - csv_output = true; - if (!strcmp(csv_sep, "\\t")) - csv_sep = "\t"; + if (stat_config.csv_sep) { + stat_config.csv_output = true; + if (!strcmp(stat_config.csv_sep, "\\t")) + stat_config.csv_sep = "\t"; } else - csv_sep = DEFAULT_SEPARATOR; + stat_config.csv_sep = DEFAULT_SEPARATOR; if (argc && !strncmp(argv[0], "rec", 3)) { argc = __cmd_record(argc, argv); @@ -2833,16 +1635,23 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (metric_only && stat_config.aggr_mode == AGGR_THREAD) { + if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) { fprintf(stderr, "--metric-only is not supported with --per-thread\n"); goto out; } - if (metric_only && run_count > 1) { + if (stat_config.metric_only && stat_config.run_count > 1) { fprintf(stderr, "--metric-only is not supported with -r\n"); goto out; } + if (stat_config.walltime_run_table && stat_config.run_count <= 1) { + fprintf(stderr, "--table is only supported with -r\n"); + parse_options_usage(stat_usage, stat_options, "r", 1); + parse_options_usage(NULL, stat_options, "table", 0); + goto out; + } + if (output_fd < 0) { fprintf(stderr, "argument to --log-fd must be a > 0\n"); parse_options_usage(stat_usage, stat_options, "log-fd", 0); @@ -2874,7 +1683,7 @@ int cmd_stat(int argc, const char **argv) /* * let the spreadsheet do the pretty-printing */ - if (csv_output) { + if (stat_config.csv_output) { /* User explicitly passed -B? */ if (big_num_opt == 1) { fprintf(stderr, "-B option not supported with -x\n"); @@ -2882,19 +1691,34 @@ int cmd_stat(int argc, const char **argv) parse_options_usage(NULL, stat_options, "x", 1); goto out; } else /* Nope, so disable big number formatting */ - big_num = false; + stat_config.big_num = false; } else if (big_num_opt == 0) /* User passed --no-big-num */ - big_num = false; + stat_config.big_num = false; setup_system_wide(argc); - if (run_count < 0) { + /* + * Display user/system times only for single + * run and when there's specified tracee. + */ + if ((stat_config.run_count == 1) && target__none(&target)) + stat_config.ru_display = true; + + if (stat_config.run_count < 0) { pr_err("Run count must be a positive number\n"); parse_options_usage(stat_usage, stat_options, "r", 1); goto out; - } else if (run_count == 0) { + } else if (stat_config.run_count == 0) { forever = true; - run_count = 1; + stat_config.run_count = 1; + } + + if (stat_config.walltime_run_table) { + stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0])); + if (!stat_config.walltime_run) { + pr_err("failed to setup -r option"); + goto out; + } } if ((stat_config.aggr_mode == AGGR_THREAD) && @@ -2994,6 +1818,17 @@ int cmd_stat(int argc, const char **argv) goto out; /* + * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless + * while avoiding that older tools show confusing messages. + * + * However for pipe sessions we need to keep it zero, + * because script's perf_evsel__check_attr is triggered + * by attr->sample_type != 0, and we can't run it on + * stat sessions. + */ + stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe); + + /* * We dont want to block the signals - that would cause * child tasks to inherit that and Ctrl-C would not work. * What we want is for Ctrl-C to work in the exec()-ed @@ -3007,12 +1842,12 @@ int cmd_stat(int argc, const char **argv) signal(SIGABRT, skip_signal); status = 0; - for (run_idx = 0; forever || run_idx < run_count; run_idx++) { - if (run_count != 1 && verbose > 0) + for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) { + if (stat_config.run_count != 1 && verbose > 0) fprintf(output, "[ perf stat: executing run #%d ... ]\n", run_idx + 1); - status = run_perf_stat(argc, argv); + status = run_perf_stat(argc, argv, run_idx); if (forever && status != -1) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); @@ -3060,6 +1895,8 @@ int cmd_stat(int argc, const char **argv) perf_stat__exit_aggr_mode(); perf_evlist__free_stats(evsel_list); out: + free(stat_config.walltime_run); + if (smi_cost && smi_reset) sysfs__write_int(FREEZE_ON_SMI_PATH, 0); diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 813698a9b8c7..a827919c6263 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -533,12 +533,8 @@ static const char *cat_backtrace(union perf_event *event, } tal.filtered = 0; - thread__find_addr_location(al.thread, cpumode, - MAP__FUNCTION, ip, &tal); - - if (tal.sym) - fprintf(f, "..... %016" PRIx64 " %s\n", ip, - tal.sym->name); + if (thread__find_symbol(al.thread, cpumode, ip, &tal)) + fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name); else fprintf(f, "..... %016" PRIx64 "\n", ip); } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f39bd60d2708..aa0c73e57924 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -123,14 +123,9 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) } notes = symbol__annotation(sym); - if (notes->src != NULL) { - pthread_mutex_lock(¬es->lock); - goto out_assign; - } - pthread_mutex_lock(¬es->lock); - if (symbol__alloc_hist(sym) < 0) { + if (!symbol__hists(sym, top->evlist->nr_entries)) { pthread_mutex_unlock(¬es->lock); pr_err("Not enough memory for annotating '%s' symbol!\n", sym->name); @@ -138,9 +133,8 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL); + err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL); if (err == 0) { -out_assign: top->sym_filter_entry = he; } else { char msg[BUFSIZ]; @@ -188,7 +182,7 @@ static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip) static void perf_top__record_precise_ip(struct perf_top *top, struct hist_entry *he, struct perf_sample *sample, - int counter, u64 ip) + struct perf_evsel *evsel, u64 ip) { struct annotation *notes; struct symbol *sym = he->ms.sym; @@ -204,7 +198,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, if (pthread_mutex_trylock(¬es->lock)) return; - err = hist_entry__inc_addr_samples(he, sample, counter, ip); + err = hist_entry__inc_addr_samples(he, sample, evsel, ip); pthread_mutex_unlock(¬es->lock); @@ -249,10 +243,9 @@ static void perf_top__show_details(struct perf_top *top) goto out_unlock; printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name); - printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter); + printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt); - more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, - 0, top->sym_pcnt_filter, top->print_entries, 4); + more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts); if (top->evlist->enabled) { if (top->zero) @@ -314,7 +307,7 @@ static void perf_top__print_sym_table(struct perf_top *top) hists__output_recalc_col_len(hists, top->print_entries - printed); putchar('\n'); hists__fprintf(hists, false, top->print_entries - printed, win_width, - top->min_percent, stdout, symbol_conf.use_callchain); + top->min_percent, stdout, !symbol_conf.use_callchain); } static void prompt_integer(int *target, const char *msg) @@ -412,7 +405,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter); - fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter); + fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt); fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); fprintf(stdout, "\t[S] stop annotation.\n"); @@ -515,7 +508,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) prompt_integer(&top->count_filter, "Enter display event count filter"); break; case 'F': - prompt_percent(&top->sym_pcnt_filter, + prompt_percent(&top->annotation_opts.min_pcnt, "Enter details display event filter (percent)"); break; case 'K': @@ -613,7 +606,8 @@ static void *display_thread_tui(void *arg) perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, &top->session->header.env, - !top->record_opts.overwrite); + !top->record_opts.overwrite, + &top->annotation_opts); done = 1; return NULL; @@ -691,7 +685,7 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, struct perf_evsel *evsel = iter->evsel; if (perf_hpp_list.sym && single) - perf_top__record_precise_ip(top, he, iter->sample, evsel->idx, al->addr); + perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr); hist__account_cycles(iter->sample->branch_stack, al, iter->sample, !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); @@ -742,7 +736,7 @@ static void perf_event__process_sample(struct perf_tool *tool, "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", - al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + al.map && map__has_symbols(al.map) ? " modules" : ""); if (use_browser <= 0) sleep(5); @@ -750,7 +744,7 @@ static void perf_event__process_sample(struct perf_tool *tool, machine->kptr_restrict_warned = true; } - if (al.sym == NULL) { + if (al.sym == NULL && al.map != NULL) { const char *msg = "Kernel samples will not be resolved.\n"; /* * As we do lazy loading of symtabs we only will know if the @@ -764,8 +758,7 @@ static void perf_event__process_sample(struct perf_tool *tool, * invalid --vmlinux ;-) */ if (!machine->kptr_restrict_warned && !top->vmlinux_warned && - al.map == machine->vmlinux_maps[MAP__FUNCTION] && - RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { + __map__is_kernel(al.map) && map__has_symbols(al.map)) { if (symbol_conf.vmlinux_name) { char serr[256]; dso__strerror_load(al.map->dso, serr, sizeof(serr)); @@ -1084,8 +1077,9 @@ static int __cmd_top(struct perf_top *top) if (top->session == NULL) return -1; - if (!objdump_path) { - ret = perf_env__lookup_objdump(&top->session->header.env); + if (!top->annotation_opts.objdump_path) { + ret = perf_env__lookup_objdump(&top->session->header.env, + &top->annotation_opts.objdump_path); if (ret) goto out_delete; } @@ -1140,11 +1134,6 @@ static int __cmd_top(struct perf_top *top) if (!target__none(&opts->target)) perf_evlist__enable(top->evlist); - /* Wait for a minimal set of events before starting the snapshot */ - perf_evlist__poll(top->evlist, 100); - - perf_top__mmap_read(top); - ret = -1; if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : display_thread), top)) { @@ -1162,6 +1151,11 @@ static int __cmd_top(struct perf_top *top) } } + /* Wait for a minimal set of events before starting the snapshot */ + perf_evlist__poll(top->evlist, 100); + + perf_top__mmap_read(top); + while (!done) { u64 hits = top->samples; @@ -1263,10 +1257,17 @@ int cmd_top(int argc, const char **argv) .uses_mmap = true, }, .proc_map_timeout = 500, - .overwrite = 1, + /* + * FIXME: This will lose PERF_RECORD_MMAP and other metadata + * when we pause, fix that and reenable. Probably using a + * separate evlist with a dummy event, i.e. a non-overwrite + * ring buffer just for metadata events, while PERF_RECORD_SAMPLE + * stays in overwrite mode. -acme + * */ + .overwrite = 0, }, - .max_stack = sysctl_perf_event_max_stack, - .sym_pcnt_filter = 5, + .max_stack = sysctl__max_stack(), + .annotation_opts = annotation__default_options, .nr_threads_synthesize = UINT_MAX, }; struct record_opts *opts = &top.record_opts; @@ -1348,15 +1349,15 @@ int cmd_top(int argc, const char **argv) "only consider symbols in these comms"), OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), - OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src, + OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src, "Interleave source code with assembly code (default)"), - OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw, + OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw, "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), - OPT_STRING(0, "objdump", &objdump_path, "path", + OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), - OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style", + OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", "Specify disassembler style (e.g. -M intel for intel syntax)"), OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"), OPT_CALLBACK(0, "percent-limit", &top, "percent", @@ -1378,6 +1379,8 @@ int cmd_top(int argc, const char **argv) "Show raw trace event output (do not use print fmt or plugins)"), OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy, "Show entries in a hierarchy"), + OPT_BOOLEAN(0, "overwrite", &top.record_opts.overwrite, + "Use a backward ring buffer, default: no"), OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, "number of thread to run event synthesize"), @@ -1392,6 +1395,9 @@ int cmd_top(int argc, const char **argv) if (status < 0) return status; + top.annotation_opts.min_pcnt = 5; + top.annotation_opts.context = 4; + top.evlist = perf_evlist__new(); if (top.evlist == NULL) return -ENOMEM; @@ -1423,6 +1429,9 @@ int cmd_top(int argc, const char **argv) } } + if (opts->branch_stack && callchain_param.enabled) + symbol_conf.show_branchflag_count = true; + sort__mode = SORT_MODE__TOP; /* display thread wants entries to be collapsed in a different tree */ perf_hpp_list.need_collapse = 1; @@ -1469,8 +1478,6 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } - symbol_conf.nr_events = top.evlist->nr_entries; - if (top.delay_secs < 1) top.delay_secs = 1; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 3ad17ee89403..835619476370 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -77,7 +77,8 @@ struct trace { struct syscall *table; struct { struct perf_evsel *sys_enter, - *sys_exit; + *sys_exit, + *augmented; } events; } syscalls; struct record_opts opts; @@ -88,6 +89,8 @@ struct trace { u64 base_time; FILE *output; unsigned long nr_events; + unsigned long nr_events_printed; + unsigned long max_events; struct strlist *ev_qualifier; struct { size_t nr; @@ -105,6 +108,7 @@ struct trace { } stats; unsigned int max_stack; unsigned int min_stack; + bool raw_augmented_syscalls; bool not_ev_qualifier; bool live; bool full_time; @@ -121,7 +125,6 @@ struct trace { bool force; bool vfs_getname; int trace_pgfaults; - int open_id; }; struct tp_field { @@ -157,13 +160,11 @@ TP_UINT_FIELD__SWAPPED(16); TP_UINT_FIELD__SWAPPED(32); TP_UINT_FIELD__SWAPPED(64); -static int tp_field__init_uint(struct tp_field *field, - struct format_field *format_field, - bool needs_swap) +static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap) { - field->offset = format_field->offset; + field->offset = offset; - switch (format_field->size) { + switch (size) { case 1: field->integer = tp_field__u8; break; @@ -183,18 +184,28 @@ static int tp_field__init_uint(struct tp_field *field, return 0; } +static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap) +{ + return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap); +} + static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) { return sample->raw_data + field->offset; } -static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field) +static int __tp_field__init_ptr(struct tp_field *field, int offset) { - field->offset = format_field->offset; + field->offset = offset; field->pointer = tp_field__ptr; return 0; } +static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field) +{ + return __tp_field__init_ptr(field, format_field->offset); +} + struct syscall_tp { struct tp_field id; union { @@ -206,7 +217,7 @@ static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel, struct tp_field *field, const char *name) { - struct format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = perf_evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -222,7 +233,7 @@ static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel, struct tp_field *field, const char *name) { - struct format_field *format_field = perf_evsel__field(evsel, name); + struct tep_format_field *format_field = perf_evsel__field(evsel, name); if (format_field == NULL) return -1; @@ -240,7 +251,54 @@ static void perf_evsel__delete_priv(struct perf_evsel *evsel) perf_evsel__delete(evsel); } -static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler) +static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel) +{ + struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); + + if (evsel->priv != NULL) { + if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr")) + goto out_delete; + return 0; + } + + return -ENOMEM; +out_delete: + zfree(&evsel->priv); + return -ENOENT; +} + +static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel) +{ + struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp)); + + if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */ + if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap)) + goto out_delete; + + return 0; + } + + return -ENOMEM; +out_delete: + zfree(&evsel->priv); + return -EINVAL; +} + +static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel) +{ + struct syscall_tp *sc = evsel->priv; + + return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)); +} + +static int perf_evsel__init_augmented_syscall_tp_ret(struct perf_evsel *evsel) +{ + struct syscall_tp *sc = evsel->priv; + + return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap); +} + +static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler) { evsel->priv = malloc(sizeof(struct syscall_tp)); if (evsel->priv != NULL) { @@ -258,7 +316,7 @@ out_delete: return -ENOENT; } -static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler) +static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) { struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction); @@ -269,7 +327,7 @@ static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void if (IS_ERR(evsel)) return NULL; - if (perf_evsel__init_syscall_tp(evsel, handler)) + if (perf_evsel__init_raw_syscall_tp(evsel, handler)) goto out_delete; return evsel; @@ -291,7 +349,7 @@ size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const cha { int idx = val - sa->offset; - if (idx < 0 || idx >= sa->nr_entries) + if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) return scnprintf(bf, size, intfmt, val); return scnprintf(bf, size, "%s", sa->entries[idx]); @@ -450,16 +508,6 @@ static const char *clockid[] = { }; static DEFINE_STRARRAY(clockid); -static const char *socket_families[] = { - "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", - "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", - "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", - "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", - "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", - "ALG", "NFC", "VSOCK", -}; -static DEFINE_STRARRAY(socket_families); - static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, struct syscall_arg *arg) { @@ -567,6 +615,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, struct syscall_arg_fmt { size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); + unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); void *parm; const char *name; bool show_zero; @@ -583,6 +632,8 @@ static struct syscall_fmt { } syscall_fmts[] = { { .name = "access", .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, + { .name = "bind", + .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, }, }, { .name = "bpf", .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, { .name = "brk", .hexret = true, @@ -597,6 +648,8 @@ static struct syscall_fmt { [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, }, { .name = "close", .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, + { .name = "connect", + .arg = { [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, }, }, { .name = "epoll_ctl", .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, { .name = "eventfd2", @@ -674,6 +727,10 @@ static struct syscall_fmt { .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, }, + { .name = "mount", + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, + [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ + .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, { .name = "mprotect", .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, @@ -753,7 +810,8 @@ static struct syscall_fmt { { .name = "sendmsg", .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, { .name = "sendto", - .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, + .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, + [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", .arg = { [0] = STRARRAY(which, itimers), }, }, @@ -761,10 +819,12 @@ static struct syscall_fmt { .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, { .name = "socket", .arg = { [0] = STRARRAY(family, socket_families), - [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, + [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, + [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, { .name = "socketpair", .arg = { [0] = STRARRAY(family, socket_families), - [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, }, + [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, + [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, { .name = "stat", .alias = "newstat", }, { .name = "statx", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, @@ -780,6 +840,8 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, { .name = "tkill", .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, + { .name = "umount2", .alias = "umount", + .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, }, { .name = "uname", .alias = "newuname", }, { .name = "unlinkat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, @@ -803,12 +865,31 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp); } +static struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) +{ + int i, nmemb = ARRAY_SIZE(syscall_fmts); + + for (i = 0; i < nmemb; ++i) { + if (syscall_fmts[i].alias && strcmp(syscall_fmts[i].alias, alias) == 0) + return &syscall_fmts[i]; + } + + return NULL; +} + +/* + * is_exit: is this "exit" or "exit_group"? + * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. + * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. + */ struct syscall { - struct event_format *tp_format; + struct tep_event_format *tp_format; int nr_args; - struct format_field *args; - const char *name; + int args_size; bool is_exit; + bool is_open; + struct tep_format_field *args; + const char *name; struct syscall_fmt *fmt; struct syscall_arg_fmt *arg_fmt; }; @@ -1040,11 +1121,21 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf, ttrace->filename.entry_str_pos = bf - ttrace->entry_str; } +static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) +{ + struct augmented_arg *augmented_arg = arg->augmented.args; + + return scnprintf(bf, size, "%.*s", augmented_arg->size, augmented_arg->value); +} + static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, struct syscall_arg *arg) { unsigned long ptr = arg->val; + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_string(arg, bf, size); + if (!arg->trace->vfs_getname) return scnprintf(bf, size, "%#x", ptr); @@ -1087,11 +1178,9 @@ static void sig_handler(int sig) interrupted = sig == SIGINT; } -static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, - u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) +static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp) { - size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); - printed += fprintf_duration(duration, duration_calculated, fp); + size_t printed = 0; if (trace->multiple_threads) { if (trace->show_comm) @@ -1102,6 +1191,14 @@ static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thre return printed; } +static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, + u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) +{ + size_t printed = trace__fprintf_tstamp(trace, tstamp, fp); + printed += fprintf_duration(duration, duration_calculated, fp); + return printed + trace__fprintf_comm_tid(trace, thread, fp); +} + static int trace__process_event(struct trace *trace, struct machine *machine, union perf_event *event, struct perf_sample *sample) { @@ -1203,10 +1300,12 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) static int syscall__set_arg_fmts(struct syscall *sc) { - struct format_field *field; + struct tep_format_field *field, *last_field = NULL; int idx = 0, len; for (field = sc->args; field; field = field->next, ++idx) { + last_field = field; + if (sc->fmt && sc->fmt->arg[idx].scnprintf) continue; @@ -1215,7 +1314,7 @@ static int syscall__set_arg_fmts(struct syscall *sc) strcmp(field->name, "path") == 0 || strcmp(field->name, "pathname") == 0)) sc->arg_fmt[idx].scnprintf = SCA_FILENAME; - else if (field->flags & FIELD_IS_POINTER) + else if (field->flags & TEP_FIELD_IS_POINTER) sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex; else if (strcmp(field->type, "pid_t") == 0) sc->arg_fmt[idx].scnprintf = SCA_PID; @@ -1237,6 +1336,9 @@ static int syscall__set_arg_fmts(struct syscall *sc) } } + if (last_field) + sc->args_size = last_field->offset + last_field->size; + return 0; } @@ -1297,6 +1399,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) } sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); + sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat"); return syscall__set_arg_fmts(sc); } @@ -1403,6 +1506,19 @@ static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, return scnprintf(bf, size, "arg%d: ", arg->idx); } +/* + * Check if the value is in fact zero, i.e. mask whatever needs masking, such + * as mount 'flags' argument that needs ignoring some magic flag, see comment + * in tools/perf/trace/beauty/mount_flags.c + */ +static unsigned long syscall__mask_val(struct syscall *sc, struct syscall_arg *arg, unsigned long val) +{ + if (sc->arg_fmt && sc->arg_fmt[arg->idx].mask_val) + return sc->arg_fmt[arg->idx].mask_val(arg, val); + + return val; +} + static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, struct syscall_arg *arg, unsigned long val) { @@ -1416,14 +1532,18 @@ static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size, } static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned char *args, struct trace *trace, - struct thread *thread) + unsigned char *args, void *augmented_args, int augmented_args_size, + struct trace *trace, struct thread *thread) { size_t printed = 0; unsigned long val; u8 bit = 1; struct syscall_arg arg = { .args = args, + .augmented = { + .size = augmented_args_size, + .args = augmented_args, + }, .idx = 0, .mask = 0, .trace = trace, @@ -1439,7 +1559,7 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, ttrace->ret_scnprintf = NULL; if (sc->args != NULL) { - struct format_field *field; + struct tep_format_field *field; for (field = sc->args; field; field = field->next, ++arg.idx, bit <<= 1) { @@ -1447,6 +1567,11 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, continue; val = syscall_arg__val(&arg, arg.idx); + /* + * Some syscall args need some mask, most don't and + * return val untouched. + */ + val = syscall__mask_val(sc, &arg, val); /* * Suppress this argument if its value is zero and @@ -1578,6 +1703,8 @@ static int trace__printf_interrupted_entry(struct trace *trace) printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; + ++trace->nr_events_printed; + return printed; } @@ -1598,6 +1725,32 @@ static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, return printed; } +static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, bool raw_augmented) +{ + void *augmented_args = NULL; + /* + * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter + * and there we get all 6 syscall args plus the tracepoint common + * fields (sizeof(long)) and the syscall_nr (another long). So we check + * if that is the case and if so don't look after the sc->args_size, + * but always after the full raw_syscalls:sys_enter payload, which is + * fixed. + * + * We'll revisit this later to pass s->args_size to the BPF augmenter + * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it + * copies only what we need for each syscall, like what happens when we + * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace + * traffic to just what is needed for each syscall. + */ + int args_size = raw_augmented ? (8 * (int)sizeof(long)) : sc->args_size; + + *augmented_args_size = sample->raw_size - args_size; + if (*augmented_args_size > 0) + augmented_args = sample->raw_data + args_size; + + return augmented_args; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1607,6 +1760,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, size_t printed = 0; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; + int augmented_args_size = 0; + void *augmented_args = NULL; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1630,13 +1785,24 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) trace__printf_interrupted_entry(trace); - + /* + * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible + * arguments, even if the syscall being handled, say "openat", uses only 4 arguments + * this breaks syscall__augmented_args() check for augmented args, as we calculate + * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file, + * so when handling, say the openat syscall, we end up getting 6 args for the + * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly + * thinking that the extra 2 u64 args are the augmented filename, so just check + * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. + */ + if (evsel != trace->syscalls.events.sys_enter) + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, - args, trace, thread); + args, augmented_args, augmented_args_size, trace, thread); if (sc->is_exit) { if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { @@ -1659,6 +1825,39 @@ out_put: return err; } +static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct thread_trace *ttrace; + struct thread *thread; + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; + struct syscall *sc = trace__syscall_info(trace, evsel, id); + char msg[1024]; + void *args, *augmented_args = NULL; + int augmented_args_size; + + if (sc == NULL) + return -1; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); + ttrace = thread__trace(thread, trace->output); + /* + * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() + * and the rest of the beautifiers accessing it via struct syscall_arg touches it. + */ + if (ttrace == NULL) + goto out_put; + + args = perf_evsel__sc_tp_ptr(evsel, args, sample); + augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls); + syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); + fprintf(trace->output, "%s", msg); + err = 0; +out_put: + thread__put(thread); + return err; +} + static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel, struct perf_sample *sample, struct callchain_cursor *cursor) @@ -1667,12 +1866,14 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse int max_stack = evsel->attr.sample_max_stack ? evsel->attr.sample_max_stack : trace->max_stack; + int err; - if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack)) + if (machine__resolve(trace->host, &al, sample) < 0) return -1; - return 0; + err = thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack); + addr_location__put(&al); + return err; } static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) @@ -1720,7 +1921,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); - if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) { + if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); ttrace->filename.pending_open = false; ++trace->stats.vfs_getname; @@ -1797,6 +1998,13 @@ errno_print: { fputc('\n', trace->output); + /* + * We only consider an 'event' for the sake of --max-events a non-filtered + * sys_enter + sys_exit and other tracepoint events. + */ + if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; + if (callchain_ret > 0) trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) @@ -1929,13 +2137,25 @@ static void bpf_output__fprintf(struct trace *trace, { binary__fprintf(sample->raw_data, sample->raw_size, 8, bpf_output__printer, NULL, trace->output); + ++trace->nr_events_printed; } static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) { + struct thread *thread; int callchain_ret = 0; + /* + * Check if we called perf_evsel__disable(evsel) due to, for instance, + * this event's max_events having been hit and this is an entry coming + * from the ring buffer that we should discard, since the max events + * have already been considered/printed. + */ + if (evsel->disabled) + return 0; + + thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); if (sample->callchain) { callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor); @@ -1952,16 +2172,47 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, if (trace->trace_syscalls) fprintf(trace->output, "( ): "); + if (thread) + trace__fprintf_comm_tid(trace, thread, trace->output); + + if (evsel == trace->syscalls.events.augmented) { + int id = perf_evsel__sc_tp_uint(evsel, id, sample); + struct syscall *sc = trace__syscall_info(trace, evsel, id); + + if (sc) { + fprintf(trace->output, "%s(", sc->name); + trace__fprintf_sys_enter(trace, evsel, sample); + fputc(')', trace->output); + goto newline; + } + + /* + * XXX: Not having the associated syscall info or not finding/adding + * the thread should never happen, but if it does... + * fall thru and print it as a bpf_output event. + */ + } + fprintf(trace->output, "%s:", evsel->name); if (perf_evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); } else if (evsel->tp_format) { - event_format__fprintf(evsel->tp_format, sample->cpu, - sample->raw_data, sample->raw_size, - trace->output); + if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || + trace__fprintf_sys_enter(trace, evsel, sample)) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + ++trace->nr_events_printed; + + if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { + perf_evsel__disable(evsel); + perf_evsel__close(evsel); + } + } } +newline: fprintf(trace->output, "\n"); if (callchain_ret > 0) @@ -1969,6 +2220,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); out: + thread__put(thread); return 0; } @@ -2024,8 +2276,7 @@ static int trace__pgfault(struct trace *trace, if (trace->summary_only) goto out; - thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION, - sample->ip, &al); + thread__find_symbol(thread, sample->cpumode, sample->ip, &al); trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); @@ -2037,12 +2288,10 @@ static int trace__pgfault(struct trace *trace, fprintf(trace->output, "] => "); - thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE, - sample->addr, &al); + thread__find_symbol(thread, sample->cpumode, sample->addr, &al); if (!al.map) { - thread__find_addr_location(thread, sample->cpumode, - MAP__FUNCTION, sample->addr, &al); + thread__find_symbol(thread, sample->cpumode, sample->addr, &al); if (al.map) map_type = 'x'; @@ -2058,6 +2307,8 @@ static int trace__pgfault(struct trace *trace, trace__fprintf_callchain(trace, sample); else if (callchain_ret < 0) pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel)); + + ++trace->nr_events_printed; out: err = 0; out_put: @@ -2235,6 +2486,9 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st tracepoint_handler handler = evsel->handler; handler(trace, evsel, event, sample); } + + if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX) + interrupted = true; } static int trace__add_syscall_newtp(struct trace *trace) @@ -2243,14 +2497,14 @@ static int trace__add_syscall_newtp(struct trace *trace) struct perf_evlist *evlist = trace->evlist; struct perf_evsel *sys_enter, *sys_exit; - sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); + sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter); if (sys_enter == NULL) goto out; if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) goto out_delete_sys_enter; - sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); + sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit); if (sys_exit == NULL) goto out_delete_sys_enter; @@ -2494,7 +2748,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * to override an explicitely set --max-stack global setting. */ evlist__for_each_entry(evlist, evsel) { - if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && + if (evsel__has_callchain(evsel) && evsel->attr.sample_max_stack == 0) evsel->attr.sample_max_stack = trace->max_stack; } @@ -2539,7 +2793,7 @@ next_event: int timeout = done ? 100 : -1; if (!draining && perf_evlist__poll(evlist, timeout) > 0) { - if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0) + if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) draining = true; goto again; @@ -2672,7 +2926,7 @@ static int trace__replay(struct trace *trace) "syscalls:sys_enter"); if (evsel && - (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 || + (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 || perf_evsel__init_sc_tp_ptr_field(evsel, args))) { pr_err("Error during initialize raw_syscalls:sys_enter event\n"); goto out; @@ -2684,7 +2938,7 @@ static int trace__replay(struct trace *trace) evsel = perf_evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit"); if (evsel && - (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 || + (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 || perf_evsel__init_sc_tp_uint_field(evsel, ret))) { pr_err("Error during initialize raw_syscalls:sys_exit event\n"); goto out; @@ -2924,6 +3178,36 @@ static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) evsel->handler = handler; } +static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->priv || !evsel->tp_format) + continue; + + if (strcmp(evsel->tp_format->system, "syscalls")) + continue; + + if (perf_evsel__init_syscall_tp(evsel)) + return -1; + + if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { + struct syscall_tp *sc = evsel->priv; + + if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64))) + return -1; + } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) { + struct syscall_tp *sc = evsel->priv; + + if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap)) + return -1; + } + } + + return 0; +} + /* * XXX: Hackish, just splitting the combined -e+--event (syscalls * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use @@ -2941,6 +3225,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, int len = strlen(str) + 1, err = -1, list, idx; char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char group_name[PATH_MAX]; + struct syscall_fmt *fmt; if (strace_groups_dir == NULL) return -1; @@ -2958,12 +3243,19 @@ static int trace__parse_events_option(const struct option *opt, const char *str, if (syscalltbl__id(trace->sctbl, s) >= 0 || syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { list = 1; + goto do_concat; + } + + fmt = syscall_fmt__find_by_alias(s); + if (fmt != NULL) { + list = 1; + s = fmt->name; } else { path__join(group_name, sizeof(group_name), strace_groups_dir, s); if (access(group_name, R_OK) == 0) list = 1; } - +do_concat: if (lists[list]) { sprintf(lists[list] + strlen(lists[list]), ",%s", s); } else { @@ -2993,6 +3285,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, if (trace__validate_ev_qualifier(trace)) goto out; + trace->trace_syscalls = true; } err = 0; @@ -3048,9 +3341,10 @@ int cmd_trace(int argc, const char **argv) }, .output = stderr, .show_comm = true, - .trace_syscalls = true, + .trace_syscalls = false, .kernel_syscallchains = false, .max_stack = UINT_MAX, + .max_events = ULONG_MAX, }; const char *output_name = NULL; const struct option trace_options[] = { @@ -3103,6 +3397,8 @@ int cmd_trace(int argc, const char **argv) &record_parse_callchain_opt), OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains, "Show the kernel callchains on the syscall exit path"), + OPT_ULONG(0, "max-events", &trace.max_events, + "Set the maximum number of events to print, exit after that is reached. "), OPT_UINTEGER(0, "min-stack", &trace.min_stack, "Set the minimum stack depth when parsing the callchain, " "anything below the specified depth will be ignored."), @@ -3123,8 +3419,9 @@ int cmd_trace(int argc, const char **argv) }; bool __maybe_unused max_stack_user_set = true; bool mmap_pages_user_set = true; + struct perf_evsel *evsel; const char * const trace_subcommands[] = { "record", NULL }; - int err; + int err = -1; char bf[BUFSIZ]; signal(SIGSEGV, sighandler_dump_stack); @@ -3147,6 +3444,16 @@ int cmd_trace(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } + evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__"); + if (IS_ERR(evsel)) { + bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf)); + pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf); + goto out; + } + + if (evsel) + trace.syscalls.events.augmented = evsel; + err = bpf__setup_stdout(trace.evlist); if (err) { bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf)); @@ -3165,7 +3472,7 @@ int cmd_trace(int argc, const char **argv) mmap_pages_user_set = false; if (trace.max_stack == UINT_MAX) { - trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack; + trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack(); max_stack_user_set = false; } @@ -3182,8 +3489,49 @@ int cmd_trace(int argc, const char **argv) symbol_conf.use_callchain = true; } - if (trace.evlist->nr_entries > 0) + if (trace.evlist->nr_entries > 0) { evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if (evlist__set_syscall_tp_fields(trace.evlist)) { + perror("failed to set syscalls:* tracepoint fields"); + goto out; + } + } + + /* + * If we are augmenting syscalls, then combine what we put in the + * __augmented_syscalls__ BPF map with what is in the + * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF, + * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit. + * + * We'll switch to look at two BPF maps, one for sys_enter and the + * other for sys_exit when we start augmenting the sys_exit paths with + * buffers that are being copied from kernel to userspace, think 'read' + * syscall. + */ + if (trace.syscalls.events.augmented) { + evsel = trace.syscalls.events.augmented; + + if (perf_evsel__init_augmented_syscall_tp(evsel) || + perf_evsel__init_augmented_syscall_tp_args(evsel)) + goto out; + evsel->handler = trace__sys_enter; + + evlist__for_each_entry(trace.evlist, evsel) { + bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0; + + if (raw_syscalls_sys_exit) { + trace.raw_augmented_syscalls = true; + goto init_augmented_syscall_tp; + } + + if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) { +init_augmented_syscall_tp: + perf_evsel__init_augmented_syscall_tp(evsel); + perf_evsel__init_augmented_syscall_tp_ret(evsel); + evsel->handler = trace__sys_exit; + } + } + } if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); @@ -3194,13 +3542,7 @@ int cmd_trace(int argc, const char **argv) if (!trace.trace_syscalls && !trace.trace_pgfaults && trace.evlist->nr_entries == 0 /* Was --events used? */) { - pr_err("Please specify something to trace.\n"); - return -1; - } - - if (!trace.trace_syscalls && trace.ev_qualifier) { - pr_err("The -e option can't be used with --no-syscalls.\n"); - goto out; + trace.trace_syscalls = true; } if (output_name != NULL) { @@ -3211,8 +3553,6 @@ int cmd_trace(int argc, const char **argv) } } - trace.open_id = syscalltbl__id(trace.sctbl, "open"); - err = target__validate(&trace.opts.target); if (err) { target__strerror(&trace.opts.target, err, bf, sizeof(bf)); diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 9aff89bc7535..9531f7bd7d9b 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -5,14 +5,17 @@ HEADERS=' include/uapi/drm/drm.h include/uapi/drm/i915_drm.h include/uapi/linux/fcntl.h +include/uapi/linux/fs.h include/uapi/linux/kcmp.h include/uapi/linux/kvm.h +include/uapi/linux/in.h include/uapi/linux/perf_event.h include/uapi/linux/prctl.h include/uapi/linux/sched.h include/uapi/linux/stat.h include/uapi/linux/vhost.h include/uapi/sound/asound.h +include/linux/bits.h include/linux/hash.h include/uapi/linux/hw_breakpoint.h arch/x86/include/asm/disabled-features.h @@ -35,6 +38,7 @@ arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h +arch/arm64/include/uapi/asm/unistd.h arch/alpha/include/uapi/asm/errno.h arch/mips/include/asm/errno.h arch/mips/include/uapi/asm/errno.h @@ -53,37 +57,53 @@ include/uapi/asm-generic/errno.h include/uapi/asm-generic/errno-base.h include/uapi/asm-generic/ioctls.h include/uapi/asm-generic/mman-common.h +include/uapi/asm-generic/unistd.h ' -check () { - file=$1 +check_2 () { + file1=$1 + file2=$2 shift - opts= - while [ -n "$*" ]; do - opts="$opts \"$1\"" - shift - done + shift - cmd="diff $opts ../$file ../../$file > /dev/null" + cmd="diff $* $file1 $file2 > /dev/null" - test -f ../../$file && - eval $cmd || echo "Warning: Kernel ABI header at 'tools/$file' differs from latest version at '$file'" >&2 + test -f $file2 && { + eval $cmd || { + echo "Warning: Kernel ABI header at '$file1' differs from latest version at '$file2'" >&2 + echo diff -u $file1 $file2 + } + } } +check () { + file=$1 + + shift + + check_2 tools/$file $file $* +} # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. test -d ../../include || exit 0 +cd ../.. + # simple diff check for i in $HEADERS; do check $i -B done # diff with extra ignore lines -check arch/x86/lib/memcpy_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -check arch/x86/lib/memset_64.S -I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -check include/uapi/asm-generic/mman.h -I "^#include <\(uapi/\)*asm-generic/mman-common.h>" -check include/uapi/linux/mman.h -I "^#include <\(uapi/\)*asm/mman.h>" +check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' +check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' +check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"' +check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' + +# diff non-symmetric files +check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl + +cd tools/perf diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 2d0caf20ff3a..bc6c585f74fc 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -30,3 +30,4 @@ perf-test mainporcelain common perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain audit +perf-version mainporcelain common diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c new file mode 100644 index 000000000000..b9c203219691 --- /dev/null +++ b/tools/perf/examples/bpf/5sec.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + Description: + + . Disable strace like syscall tracing (--no-syscalls), or try tracing + just some (-e *sleep). + + . Attach a filter function to a kernel function, returning when it should + be considered, i.e. appear on the output. + + . Run it system wide, so that any sleep of >= 5 seconds and < than 6 + seconds gets caught. + + . Ask for callgraphs using DWARF info, so that userspace can be unwound + + . While this is running, run something like "sleep 5s". + + . If we decide to add tv_nsec as well, then it becomes: + + int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec) + + I.e. add where it comes from (rqtp->tv_nsec) and where it will be + accessible in the function body (nsec) + + # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/ + 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5 + hrtimer_nanosleep ([kernel.kallsyms]) + __x64_sys_nanosleep ([kernel.kallsyms]) + do_syscall_64 ([kernel.kallsyms]) + entry_SYSCALL_64 ([kernel.kallsyms]) + __GI___nanosleep (/usr/lib64/libc-2.26.so) + rpl_nanosleep (/usr/bin/sleep) + xnanosleep (/usr/bin/sleep) + main (/usr/bin/sleep) + __libc_start_main (/usr/lib64/libc-2.26.so) + _start (/usr/bin/sleep) + ^C# + + Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com> +*/ + +#include <bpf.h> + +int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec) +{ + return sec == 5; +} + +license(GPL); diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 000000000000..90a19336310b --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include <stdio.h> +#include <linux/socket.h> + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct { + struct syscall_enter_args args; + struct augmented_filename filename; + } augmented_args; + unsigned int len = sizeof(augmented_args); + const void *filename_arg = NULL; + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + /* + * Yonghong and Edward Cree sayz: + * + * https://www.spinics.net/lists/netdev/msg531645.html + * + * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 + * >> 10: (bf) r1 = r6 + * >> 11: (07) r1 += 16 + * >> 12: (05) goto pc+2 + * >> 15: (79) r3 = *(u64 *)(r1 +0) + * >> dereference of modified ctx ptr R1 off=16 disallowed + * > Aha, we at least got a different error message this time. + * > And indeed llvm has done that optimisation, rather than the more obvious + * > 11: r3 = *(u64 *)(r1 +16) + * > because it wants to have lots of reads share a single insn. You may be able + * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone + * > with llvm knowledge can figure out how to stop it (ideally, llvm would know + * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ + * + * The optimization mostly likes below: + * + * br1: + * ... + * r1 += 16 + * goto merge + * br2: + * ... + * r1 += 20 + * goto merge + * merge: + * *(u64 *)(r1 + 0) + * + * The compiler tries to merge common loads. There is no easy way to + * stop this compiler optimization without turning off a lot of other + * optimizations. The easiest way is to add barriers: + * + * __asm__ __volatile__("": : :"memory") + * + * after the ctx memory access to prevent their down stream merging. + */ + switch (augmented_args.args.syscall_nr) { + case SYS_OPEN: filename_arg = (const void *)args->args[0]; + __asm__ __volatile__("": : :"memory"); + break; + case SYS_OPENAT: filename_arg = (const void *)args->args[1]; + break; + } + + if (filename_arg != NULL) { + augmented_args.filename.reserved = 0; + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + filename_arg); + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; + len &= sizeof(augmented_args.filename.value) - 1; + } + } else { + len = sizeof(augmented_args.args); + } + + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c new file mode 100644 index 000000000000..2ae44813ef2d --- /dev/null +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -0,0 +1,173 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment syscalls with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null + * + * It'll catch some openat syscalls related to the dynamic linked and + * the last one should be the one for '/etc/passwd'. + * + * This matches what is marshalled into the raw_syscall:sys_enter payload + * expected by the 'perf trace' beautifiers, and can be used by them, that will + * check if perf_sample->raw_data is more than what is expected for each + * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the + * contents of pointer arguments. + */ + +#include <stdio.h> +#include <linux/socket.h> + +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define augmented_filename_syscall(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + unsigned int len = sizeof(augmented_args); \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ + len &= sizeof(augmented_args.filename.value) - 1; \ + } \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ + return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ +} + +struct syscall_enter_openat_args { + unsigned long long common_tp_fields; + long syscall_nr; + long dfd; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall(openat); + +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall(open); + +struct syscall_enter_inotify_add_watch_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + char *filename_ptr; + long mask; +}; + +augmented_filename_syscall(inotify_add_watch); + +struct statbuf; + +struct syscall_enter_newstat_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + struct stat *statbuf; +}; + +augmented_filename_syscall(newstat); + +#ifndef _K_SS_MAXSIZE +#define _K_SS_MAXSIZE 128 +#endif + +#define augmented_sockaddr_syscall(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct sockaddr_storage addr; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args; \ + unsigned long addrlen = sizeof(augmented_args.addr); \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ +/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ +/* if (addrlen > augmented_args.args.addrlen) */ \ +/* addrlen = augmented_args.args.addrlen; */ \ +/* */ \ + probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ + return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ +} + +struct sockaddr; + +struct syscall_enter_bind_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +augmented_sockaddr_syscall(bind); + +struct syscall_enter_connect_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +augmented_sockaddr_syscall(connect); + +struct syscall_enter_sendto_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + void *buff; + long len; + unsigned long flags; + struct sockaddr *addr_ptr; + long addr_len; +}; + +augmented_sockaddr_syscall(sendto); + +license(GPL); diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c new file mode 100644 index 000000000000..3776d26db9e7 --- /dev/null +++ b/tools/perf/examples/bpf/empty.c @@ -0,0 +1,3 @@ +#include <bpf.h> + +license(GPL); diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c new file mode 100644 index 000000000000..b59e8812ee8c --- /dev/null +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the filename syscalls with the contents of the filename pointer argument + * filtering only those that do not start with /etc/. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null + * + * It'll catch some openat syscalls related to the dynamic linked and + * the last one should be the one for '/etc/passwd'. + * + * This matches what is marshalled into the raw_syscall:sys_enter payload + * expected by the 'perf trace' beautifiers, and can be used by them unmodified, + * which will be done as that feature is implemented in the next csets, for now + * it will appear in a dump done by the default tracepoint handler in 'perf trace', + * that uses bpf_output__fprintf() to just dump those contents, as done with + * the bpf-output event associated with the __bpf_output__ map declared in + * tools/perf/include/bpf/stdio.h. + */ + +#include <stdio.h> + +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct augmented_filename { + int size; + int reserved; + char value[64]; +}; + +#define augmented_filename_syscall_enter(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + char etc[6] = "/etc/"; \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ + return 0; \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ + return 0; \ +} + +struct syscall_enter_openat_args { + unsigned long long common_tp_fields; + long syscall_nr; + long dfd; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(openat); + +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(open); + +license(GPL); diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c new file mode 100644 index 000000000000..cf3c2fdc7f79 --- /dev/null +++ b/tools/perf/examples/bpf/hello.c @@ -0,0 +1,9 @@ +#include <stdio.h> + +int syscall_enter(openat)(void *args) +{ + puts("Hello, world\n"); + return 0; +} + +license(GPL); diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c new file mode 100644 index 000000000000..9cd124b09392 --- /dev/null +++ b/tools/perf/examples/bpf/sys_enter_openat.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hook into 'openat' syscall entry tracepoint + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null + * + * It'll catch some openat syscalls related to the dynamic linked and + * the last one should be the one for '/etc/passwd'. + * + * The syscall_enter_openat_args can be used to get the syscall fields + * and use them for filtering calls, i.e. use in expressions for + * the return value. + */ + +#include <bpf.h> + +struct syscall_enter_openat_args { + unsigned long long unused; + long syscall_nr; + long dfd; + char *filename_ptr; + long flags; + long mode; +}; + +int syscall_enter(openat)(struct syscall_enter_openat_args *args) +{ + return 1; +} + +license(GPL); diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h new file mode 100644 index 000000000000..52b6d87fe822 --- /dev/null +++ b/tools/perf/include/bpf/bpf.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _PERF_BPF_H +#define _PERF_BPF_H + +#include <uapi/linux/bpf.h> + +/* + * A helper structure used by eBPF C program to describe map attributes to + * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h: + */ +struct bpf_map { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; + unsigned int numa_node; +}; + +#define SEC(NAME) __attribute__((section(NAME), used)) + +#define probe(function, vars) \ + SEC(#function "=" #function " " #vars) function + +#define syscall_enter(name) \ + SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name + +#define syscall_exit(name) \ + SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name + +#define license(name) \ +char _license[] SEC("license") = #name; \ +int _version SEC("version") = LINUX_VERSION_CODE; + +static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read; +static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str; + +#endif /* _PERF_BPF_H */ diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h new file mode 100644 index 000000000000..7f844568dab8 --- /dev/null +++ b/tools/perf/include/bpf/linux/socket.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_SOCKET_H +#define _UAPI_LINUX_SOCKET_H + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define _K_SS_MAXSIZE 128 /* Implementation specific max size */ +#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) + /* Implementation specific desired alignment */ + +typedef unsigned short __kernel_sa_family_t; + +struct __kernel_sockaddr_storage { + __kernel_sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ +} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ + +#define sockaddr_storage __kernel_sockaddr_storage + +#endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h new file mode 100644 index 000000000000..2899cb7bfed8 --- /dev/null +++ b/tools/perf/include/bpf/stdio.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <bpf.h> + +struct bpf_map SEC("maps") __bpf_stdout__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = + (void *)BPF_FUNC_perf_event_output; + +#define puts(from) \ + ({ const int __len = sizeof(from); \ + char __from[__len] = from; \ + perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ + &__from, __len & (sizeof(from) - 1)); }) diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c index 0c6d1002b524..f7eb63cbbc65 100644 --- a/tools/perf/jvmti/jvmti_agent.c +++ b/tools/perf/jvmti/jvmti_agent.c @@ -35,6 +35,7 @@ #include <sys/mman.h> #include <syscall.h> /* for gettid() */ #include <err.h> +#include <linux/kernel.h> #include "jvmti_agent.h" #include "../util/jitdump.h" @@ -124,7 +125,7 @@ perf_get_timestamp(void) } static int -debug_cache_init(void) +create_jit_cache_dir(void) { char str[32]; char *base, *p; @@ -143,8 +144,13 @@ debug_cache_init(void) strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm); - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/", base); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because %s/.debug/" + " is too long, please check the cwd, JITDUMPDIR, and" + " HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { @@ -153,20 +159,32 @@ debug_cache_init(void) } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit", base); + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", base); + return -1; + } ret = mkdir(jit_path, 0755); if (ret == -1) { if (errno != EEXIST) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } } - snprintf(jit_path, PATH_MAX - 1, "%s/.debug/jit/%s.XXXXXXXX", base, str); - + ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jit cache dir because" + " %s/.debug/jit/%s.XXXXXXXX is too long, please check" + " the cwd, JITDUMPDIR, and HOME variables", + base, str); + return -1; + } p = mkdtemp(jit_path); if (p != jit_path) { - warn("cannot create jit cache dir %s", jit_path); + warn("jvmti: cannot create jit cache dir %s", jit_path); return -1; } @@ -227,7 +245,7 @@ void *jvmti_open(void) { char dump_path[PATH_MAX]; struct jitheader header; - int fd; + int fd, ret; FILE *fp; init_arch_timestamp(); @@ -244,12 +262,22 @@ void *jvmti_open(void) memset(&header, 0, sizeof(header)); - debug_cache_init(); + /* + * jitdump file dir + */ + if (create_jit_cache_dir() < 0) + return NULL; /* * jitdump file name */ - snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid()); + if (ret >= PATH_MAX) { + warnx("jvmti: cannot generate jitdump file full path because" + " %s/jit-%i.dump is too long, please check the cwd," + " JITDUMPDIR, and HOME variables", jit_path, getpid()); + return NULL; + } fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666); if (fd == -1) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 20a08cb32332..a11cb006f968 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -12,7 +12,6 @@ #include "util/env.h" #include <subcmd/exec-cmd.h> #include "util/config.h" -#include "util/quote.h" #include <subcmd/run-command.h> #include "util/parse-events.h" #include <subcmd/parse-options.h> @@ -238,7 +237,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) (*argc)--; } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) { tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR)); - fprintf(stderr, "dir: %s\n", tracing_path); + fprintf(stderr, "dir: %s\n", tracing_path_mount()); if (envchanged) *envchanged = 1; } else if (!strcmp(cmd, "--list-cmds")) { @@ -421,22 +420,11 @@ void pthread__unblock_sigwinch(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } -#ifdef _SC_LEVEL1_DCACHE_LINESIZE -#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) -#else -static void cache_line_size(int *cacheline_sizep) -{ - if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) - pr_debug("cannot determine cache line size"); -} -#endif - int main(int argc, const char **argv) { int err; const char *cmd; char sbuf[STRERR_BUFSIZE]; - int value; /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); @@ -444,13 +432,6 @@ int main(int argc, const char **argv) /* The page_size is placed in util object. */ page_size = sysconf(_SC_PAGE_SIZE); - cache_line_size(&cacheline_size); - - if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) - sysctl_perf_event_max_stack = value; - - if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) - sysctl_perf_event_max_contexts_per_stack = value; cmd = extract_argv0_path(argv[0]); if (!cmd) @@ -458,15 +439,11 @@ int main(int argc, const char **argv) srandom(time(NULL)); - perf_config__init(); err = perf_config(perf_default_config, NULL); if (err) return err; set_buildid_dir(NULL); - /* get debugfs/tracefs mount point from /proc/mounts */ - tracing_path_mount(); - /* * "perf-xxxx" is the same as "perf xxxx", but we obviously: * diff --git a/tools/perf/perf.h b/tools/perf/perf.h index a1a97956136f..0ed4a34c74c4 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,6 +5,7 @@ #include <time.h> #include <stdbool.h> #include <linux/types.h> +#include <linux/stddef.h> #include <linux/perf_event.h> extern bool test_attr__enabled; @@ -24,7 +25,9 @@ static inline unsigned long long rdclock(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } +#ifndef MAX_NR_CPUS #define MAX_NR_CPUS 1024 +#endif extern const char *input_name; extern bool perf_host, perf_guest; @@ -78,6 +81,7 @@ struct record_opts { unsigned initial_delay; bool use_clockid; clockid_t clockid; + u64 clockid_res_ns; unsigned int proc_map_timeout; }; diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build index 17783913d330..215ba30b8534 100644 --- a/tools/perf/pmu-events/Build +++ b/tools/perf/pmu-events/Build @@ -1,7 +1,7 @@ hostprogs := jevents jevents-y += json.o jsmn.o jevents.o -CHOSTFLAGS_jevents.o = -I$(srctree)/tools/include +HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include pmu-events-y += pmu-events.o JDIR = pmu-events/arch/$(SRCARCH) JSON = $(shell [ -d $(JDIR) ] && \ diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json new file mode 100644 index 000000000000..abc98b018446 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/branch.json @@ -0,0 +1,23 @@ +[ + { + "ArchStdEvent": "BR_IMMED_SPEC", + }, + { + "ArchStdEvent": "BR_RETURN_SPEC", + }, + { + "ArchStdEvent": "BR_INDIRECT_SPEC", + }, + { + "PublicDescription": "Mispredicted or not predicted branch speculatively executed", + "EventCode": "0x10", + "EventName": "BR_MIS_PRED", + "BriefDescription": "Branch mispredicted" + }, + { + "PublicDescription": "Predictable branch speculatively executed", + "EventCode": "0x12", + "EventName": "BR_PRED", + "BriefDescription": "Predictable branch" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json new file mode 100644 index 000000000000..687b2629e1d1 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/bus.json @@ -0,0 +1,26 @@ +[ + { + "ArchStdEvent": "BUS_ACCESS_RD", + }, + { + "ArchStdEvent": "BUS_ACCESS_WR", + }, + { + "ArchStdEvent": "BUS_ACCESS_SHARED", + }, + { + "ArchStdEvent": "BUS_ACCESS_NOT_SHARED", + }, + { + "ArchStdEvent": "BUS_ACCESS_NORMAL", + }, + { + "ArchStdEvent": "BUS_ACCESS_PERIPH", + }, + { + "PublicDescription": "Bus access", + "EventCode": "0x19", + "EventName": "BUS_ACCESS", + "BriefDescription": "Bus access" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json new file mode 100644 index 000000000000..df9201434cb6 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -0,0 +1,191 @@ +[ + { + "ArchStdEvent": "L1D_CACHE_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_WR", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L1D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_RD", + }, + { + "ArchStdEvent": "L2D_CACHE_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L2D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L2D_CACHE_INVAL", + }, + { + "PublicDescription": "Level 1 instruction cache refill", + "EventCode": "0x01", + "EventName": "L1I_CACHE_REFILL", + "BriefDescription": "L1I cache refill" + }, + { + "PublicDescription": "Level 1 instruction TLB refill", + "EventCode": "0x02", + "EventName": "L1I_TLB_REFILL", + "BriefDescription": "L1I TLB refill" + }, + { + "PublicDescription": "Level 1 data cache refill", + "EventCode": "0x03", + "EventName": "L1D_CACHE_REFILL", + "BriefDescription": "L1D cache refill" + }, + { + "PublicDescription": "Level 1 data cache access", + "EventCode": "0x04", + "EventName": "L1D_CACHE_ACCESS", + "BriefDescription": "L1D cache access" + }, + { + "PublicDescription": "Level 1 data TLB refill", + "EventCode": "0x05", + "EventName": "L1D_TLB_REFILL", + "BriefDescription": "L1D TLB refill" + }, + { + "PublicDescription": "Level 1 instruction cache access", + "EventCode": "0x14", + "EventName": "L1I_CACHE_ACCESS", + "BriefDescription": "L1I cache access" + }, + { + "PublicDescription": "Level 2 data cache access", + "EventCode": "0x16", + "EventName": "L2D_CACHE_ACCESS", + "BriefDescription": "L2D cache access" + }, + { + "PublicDescription": "Level 2 data refill", + "EventCode": "0x17", + "EventName": "L2D_CACHE_REFILL", + "BriefDescription": "L2D cache refill" + }, + { + "PublicDescription": "Level 2 data cache, Write-Back", + "EventCode": "0x18", + "EventName": "L2D_CACHE_WB", + "BriefDescription": "L2D cache Write-Back" + }, + { + "PublicDescription": "Level 1 data TLB access. This event counts any load or store operation which accesses the data L1 TLB", + "EventCode": "0x25", + "EventName": "L1D_TLB_ACCESS", + "BriefDescription": "L1D TLB access" + }, + { + "PublicDescription": "Level 1 instruction TLB access. This event counts any instruction fetch which accesses the instruction L1 TLB", + "EventCode": "0x26", + "EventName": "L1I_TLB_ACCESS", + "BriefDescription": "L1I TLB access" + }, + { + "PublicDescription": "Level 2 access to data TLB that caused a page table walk. This event counts on any data access which causes L2D_TLB_REFILL to count", + "EventCode": "0x34", + "EventName": "L2D_TLB_ACCESS", + "BriefDescription": "L2D TLB access" + }, + { + "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count", + "EventCode": "0x35", + "EventName": "L2I_TLB_ACCESS", + "BriefDescription": "L2D TLB access" + }, + { + "PublicDescription": "Branch target buffer misprediction", + "EventCode": "0x102", + "EventName": "BTB_MIS_PRED", + "BriefDescription": "BTB misprediction" + }, + { + "PublicDescription": "ITB miss", + "EventCode": "0x103", + "EventName": "ITB_MISS", + "BriefDescription": "ITB miss" + }, + { + "PublicDescription": "DTB miss", + "EventCode": "0x104", + "EventName": "DTB_MISS", + "BriefDescription": "DTB miss" + }, + { + "PublicDescription": "Level 1 data cache late miss", + "EventCode": "0x105", + "EventName": "L1D_CACHE_LATE_MISS", + "BriefDescription": "L1D cache late miss" + }, + { + "PublicDescription": "Level 1 data cache prefetch request", + "EventCode": "0x106", + "EventName": "L1D_CACHE_PREFETCH", + "BriefDescription": "L1D cache prefetch" + }, + { + "PublicDescription": "Level 2 data cache prefetch request", + "EventCode": "0x107", + "EventName": "L2D_CACHE_PREFETCH", + "BriefDescription": "L2D cache prefetch" + }, + { + "PublicDescription": "Level 1 stage 2 TLB refill", + "EventCode": "0x111", + "EventName": "L1_STAGE2_TLB_REFILL", + "BriefDescription": "L1 stage 2 TLB refill" + }, + { + "PublicDescription": "Page walk cache level-0 stage-1 hit", + "EventCode": "0x112", + "EventName": "PAGE_WALK_L0_STAGE1_HIT", + "BriefDescription": "Page walk, L0 stage-1 hit" + }, + { + "PublicDescription": "Page walk cache level-1 stage-1 hit", + "EventCode": "0x113", + "EventName": "PAGE_WALK_L1_STAGE1_HIT", + "BriefDescription": "Page walk, L1 stage-1 hit" + }, + { + "PublicDescription": "Page walk cache level-2 stage-1 hit", + "EventCode": "0x114", + "EventName": "PAGE_WALK_L2_STAGE1_HIT", + "BriefDescription": "Page walk, L2 stage-1 hit" + }, + { + "PublicDescription": "Page walk cache level-1 stage-2 hit", + "EventCode": "0x115", + "EventName": "PAGE_WALK_L1_STAGE2_HIT", + "BriefDescription": "Page walk, L1 stage-2 hit" + }, + { + "PublicDescription": "Page walk cache level-2 stage-2 hit", + "EventCode": "0x116", + "EventName": "PAGE_WALK_L2_STAGE2_HIT", + "BriefDescription": "Page walk, L2 stage-2 hit" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json new file mode 100644 index 000000000000..38cd1f1a70dc --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/clock.json @@ -0,0 +1,20 @@ +[ + { + "PublicDescription": "The number of core clock cycles", + "EventCode": "0x11", + "EventName": "CPU_CYCLES", + "BriefDescription": "Clock cycles" + }, + { + "PublicDescription": "FSU clocking gated off cycle", + "EventCode": "0x101", + "EventName": "FSU_CLOCK_OFF_CYCLES", + "BriefDescription": "FSU clocking gated off cycle" + }, + { + "PublicDescription": "Wait state cycle", + "EventCode": "0x110", + "EventName": "Wait_CYCLES", + "BriefDescription": "Wait state cycle" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json new file mode 100644 index 000000000000..3720dc28a15f --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/exception.json @@ -0,0 +1,50 @@ +[ + { + "ArchStdEvent": "EXC_UNDEF", + }, + { + "ArchStdEvent": "EXC_SVC", + }, + { + "ArchStdEvent": "EXC_PABORT", + }, + { + "ArchStdEvent": "EXC_DABORT", + }, + { + "ArchStdEvent": "EXC_IRQ", + }, + { + "ArchStdEvent": "EXC_FIQ", + }, + { + "ArchStdEvent": "EXC_HVC", + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER", + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ", + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ", + }, + { + "PublicDescription": "Exception taken", + "EventCode": "0x09", + "EventName": "EXC_TAKEN", + "BriefDescription": "Exception taken" + }, + { + "PublicDescription": "Instruction architecturally executed, condition check pass, exception return", + "EventCode": "0x0a", + "EventName": "EXC_RETURN", + "BriefDescription": "Exception return" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json new file mode 100644 index 000000000000..82cf753e6472 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/instruction.json @@ -0,0 +1,89 @@ +[ + { + "ArchStdEvent": "LD_SPEC", + }, + { + "ArchStdEvent": "ST_SPEC", + }, + { + "ArchStdEvent": "LDST_SPEC", + }, + { + "ArchStdEvent": "DP_SPEC", + }, + { + "ArchStdEvent": "ASE_SPEC", + }, + { + "ArchStdEvent": "VFP_SPEC", + }, + { + "ArchStdEvent": "PC_WRITE_SPEC", + }, + { + "ArchStdEvent": "CRYPTO_SPEC", + }, + { + "ArchStdEvent": "ISB_SPEC", + }, + { + "ArchStdEvent": "DSB_SPEC", + }, + { + "ArchStdEvent": "DMB_SPEC", + }, + { + "ArchStdEvent": "RC_LD_SPEC", + }, + { + "ArchStdEvent": "RC_ST_SPEC", + }, + { + "PublicDescription": "Instruction architecturally executed, software increment", + "EventCode": "0x00", + "EventName": "SW_INCR", + "BriefDescription": "Software increment" + }, + { + "PublicDescription": "Instruction architecturally executed", + "EventCode": "0x08", + "EventName": "INST_RETIRED", + "BriefDescription": "Instruction retired" + }, + { + "PublicDescription": "Instruction architecturally executed, condition code check pass, write to CONTEXTIDR", + "EventCode": "0x0b", + "EventName": "CID_WRITE_RETIRED", + "BriefDescription": "Write to CONTEXTIDR" + }, + { + "PublicDescription": "Operation speculatively executed", + "EventCode": "0x1b", + "EventName": "INST_SPEC", + "BriefDescription": "Speculatively executed" + }, + { + "PublicDescription": "Instruction architecturally executed (condition check pass), write to TTBR", + "EventCode": "0x1c", + "EventName": "TTBR_WRITE_RETIRED", + "BriefDescription": "Instruction executed, TTBR write" + }, + { + "PublicDescription": "Instruction architecturally executed, branch. This event counts all branches, taken or not. This excludes exception entries, debug entries and CCFAIL branches", + "EventCode": "0x21", + "EventName": "BR_RETIRED", + "BriefDescription": "Branch retired" + }, + { + "PublicDescription": "Instruction architecturally executed, mispredicted branch. This event counts any branch counted by BR_RETIRED which is not correctly predicted and causes a pipeline flush", + "EventCode": "0x22", + "EventName": "BR_MISPRED_RETIRED", + "BriefDescription": "Mispredicted branch retired" + }, + { + "PublicDescription": "Operation speculatively executed, NOP", + "EventCode": "0x100", + "EventName": "NOP_SPEC", + "BriefDescription": "Speculatively executed, NOP" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json new file mode 100644 index 000000000000..2aecc5c2347d --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/intrinsic.json @@ -0,0 +1,14 @@ +[ + { + "ArchStdEvent": "LDREX_SPEC", + }, + { + "ArchStdEvent": "STREX_PASS_SPEC", + }, + { + "ArchStdEvent": "STREX_FAIL_SPEC", + }, + { + "ArchStdEvent": "STREX_SPEC", + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json new file mode 100644 index 000000000000..08508697b318 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/memory.json @@ -0,0 +1,29 @@ +[ + { + "ArchStdEvent": "MEM_ACCESS_RD", + }, + { + "ArchStdEvent": "MEM_ACCESS_WR", + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC", + }, + { + "PublicDescription": "Data memory access", + "EventCode": "0x13", + "EventName": "MEM_ACCESS", + "BriefDescription": "Memory access" + }, + { + "PublicDescription": "Local memory error. This event counts any correctable or uncorrectable memory error (ECC or parity) in the protected core RAMs", + "EventCode": "0x1a", + "EventName": "MEM_ERROR", + "BriefDescription": "Memory error" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json new file mode 100644 index 000000000000..e2087de586bf --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/pipeline.json @@ -0,0 +1,50 @@ +[ + { + "PublicDescription": "Decode starved for instruction cycle", + "EventCode": "0x108", + "EventName": "DECODE_STALL", + "BriefDescription": "Decode starved" + }, + { + "PublicDescription": "Op dispatch stalled cycle", + "EventCode": "0x109", + "EventName": "DISPATCH_STALL", + "BriefDescription": "Dispatch stalled" + }, + { + "PublicDescription": "IXA Op non-issue", + "EventCode": "0x10a", + "EventName": "IXA_STALL", + "BriefDescription": "IXA stalled" + }, + { + "PublicDescription": "IXB Op non-issue", + "EventCode": "0x10b", + "EventName": "IXB_STALL", + "BriefDescription": "IXB stalled" + }, + { + "PublicDescription": "BX Op non-issue", + "EventCode": "0x10c", + "EventName": "BX_STALL", + "BriefDescription": "BX stalled" + }, + { + "PublicDescription": "LX Op non-issue", + "EventCode": "0x10d", + "EventName": "LX_STALL", + "BriefDescription": "LX stalled" + }, + { + "PublicDescription": "SX Op non-issue", + "EventCode": "0x10e", + "EventName": "SX_STALL", + "BriefDescription": "SX stalled" + }, + { + "PublicDescription": "FX Op non-issue", + "EventCode": "0x10f", + "EventName": "FX_STALL", + "BriefDescription": "FX stalled" + }, +] diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json index bc03c06c3918..752e47eb6977 100644 --- a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json +++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json @@ -12,6 +12,21 @@ "ArchStdEvent": "L1D_CACHE_REFILL_WR", }, { + "ArchStdEvent": "L1D_CACHE_REFILL_INNER", + }, + { + "ArchStdEvent": "L1D_CACHE_REFILL_OUTER", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_VICTIM", + }, + { + "ArchStdEvent": "L1D_CACHE_WB_CLEAN", + }, + { + "ArchStdEvent": "L1D_CACHE_INVAL", + }, + { "ArchStdEvent": "L1D_TLB_REFILL_RD", }, { @@ -24,9 +39,75 @@ "ArchStdEvent": "L1D_TLB_WR", }, { + "ArchStdEvent": "L2D_TLB_REFILL_RD", + }, + { + "ArchStdEvent": "L2D_TLB_REFILL_WR", + }, + { + "ArchStdEvent": "L2D_TLB_RD", + }, + { + "ArchStdEvent": "L2D_TLB_WR", + }, + { "ArchStdEvent": "BUS_ACCESS_RD", - }, - { + }, + { "ArchStdEvent": "BUS_ACCESS_WR", - } + }, + { + "ArchStdEvent": "MEM_ACCESS_RD", + }, + { + "ArchStdEvent": "MEM_ACCESS_WR", + }, + { + "ArchStdEvent": "UNALIGNED_LD_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_ST_SPEC", + }, + { + "ArchStdEvent": "UNALIGNED_LDST_SPEC", + }, + { + "ArchStdEvent": "EXC_UNDEF", + }, + { + "ArchStdEvent": "EXC_SVC", + }, + { + "ArchStdEvent": "EXC_PABORT", + }, + { + "ArchStdEvent": "EXC_DABORT", + }, + { + "ArchStdEvent": "EXC_IRQ", + }, + { + "ArchStdEvent": "EXC_FIQ", + }, + { + "ArchStdEvent": "EXC_SMC", + }, + { + "ArchStdEvent": "EXC_HVC", + }, + { + "ArchStdEvent": "EXC_TRAP_PABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_DABORT", + }, + { + "ArchStdEvent": "EXC_TRAP_OTHER", + }, + { + "ArchStdEvent": "EXC_TRAP_IRQ", + }, + { + "ArchStdEvent": "EXC_TRAP_FIQ", + } ] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index f03e26ecb658..59cd8604b0bd 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -16,3 +16,4 @@ 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000480fd010,v1,hisilicon/hip08,core +0x00000000500f0000,v1,ampere/emag,core diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json index 8bf16759ca53..2dd8dafff2ef 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json @@ -1,71 +1,83 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "0", "EventName": "CPU_CYCLES", "BriefDescription": "CPU Cycles", "PublicDescription": "Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "1", "EventName": "INSTRUCTIONS", "BriefDescription": "Instructions", "PublicDescription": "Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "2", "EventName": "L1I_DIR_WRITES", "BriefDescription": "L1I Directory Writes", "PublicDescription": "Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "3", "EventName": "L1I_PENALTY_CYCLES", "BriefDescription": "L1I Penalty Cycles", "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "4", "EventName": "L1D_DIR_WRITES", "BriefDescription": "L1D Directory Writes", "PublicDescription": "Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "5", "EventName": "L1D_PENALTY_CYCLES", "BriefDescription": "L1D Penalty Cycles", "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "32", "EventName": "PROBLEM_STATE_CPU_CYCLES", "BriefDescription": "Problem-State CPU Cycles", "PublicDescription": "Problem-State Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "33", "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "34", "EventName": "PROBLEM_STATE_L1I_DIR_WRITES", "BriefDescription": "Problem-State L1I Directory Writes", "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "35", "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES", "BriefDescription": "Problem-State L1I Penalty Cycles", "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "36", "EventName": "PROBLEM_STATE_L1D_DIR_WRITES", "BriefDescription": "Problem-State L1D Directory Writes", "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "37", "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json index 7e5b72492141..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json @@ -1,95 +1,111 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "64", "EventName": "PRNG_FUNCTIONS", "BriefDescription": "PRNG Functions", "PublicDescription": "Total number of the PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "65", "EventName": "PRNG_CYCLES", "BriefDescription": "PRNG Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "66", "EventName": "PRNG_BLOCKED_FUNCTIONS", "BriefDescription": "PRNG Blocked Functions", "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "67", "EventName": "PRNG_BLOCKED_CYCLES", "BriefDescription": "PRNG Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "68", "EventName": "SHA_FUNCTIONS", "BriefDescription": "SHA Functions", "PublicDescription": "Total number of SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "69", "EventName": "SHA_CYCLES", "BriefDescription": "SHA Cycles", "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "70", "EventName": "SHA_BLOCKED_FUNCTIONS", "BriefDescription": "SHA Blocked Functions", "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "71", "EventName": "SHA_BLOCKED_CYCLES", "BriefDescription": "SHA Bloced Cycles", "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "72", "EventName": "DEA_FUNCTIONS", "BriefDescription": "DEA Functions", "PublicDescription": "Total number of the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "73", "EventName": "DEA_CYCLES", "BriefDescription": "DEA Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "74", "EventName": "DEA_BLOCKED_FUNCTIONS", "BriefDescription": "DEA Blocked Functions", "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "75", "EventName": "DEA_BLOCKED_CYCLES", "BriefDescription": "DEA Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "76", "EventName": "AES_FUNCTIONS", "BriefDescription": "AES Functions", "PublicDescription": "Total number of AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "77", "EventName": "AES_CYCLES", "BriefDescription": "AES Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "78", "EventName": "AES_BLOCKED_FUNCTIONS", "BriefDescription": "AES Blocked Functions", "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "79", "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json index 0feedb40f30f..b6b7f29ca831 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json @@ -1,107 +1,125 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "128", "EventName": "L1I_L2_SOURCED_WRITES", "BriefDescription": "L1I L2 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 (L1.5) cache" }, { + "Unit": "CPU-M-CF", "EventCode": "129", "EventName": "L1D_L2_SOURCED_WRITES", "BriefDescription": "L1D L2 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from the Level-2 (L1.5) cache" }, { + "Unit": "CPU-M-CF", "EventCode": "130", "EventName": "L1I_L3_LOCAL_WRITES", "BriefDescription": "L1I L3 Local Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from the Level-3 cache that is on the same book as the Instruction cache (Local L2 cache)" }, { + "Unit": "CPU-M-CF", "EventCode": "131", "EventName": "L1D_L3_LOCAL_WRITES", "BriefDescription": "L1D L3 Local Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installtion cache line was source from the Level-3 cache that is on the same book as the Data cache (Local L2 cache)" }, { + "Unit": "CPU-M-CF", "EventCode": "132", "EventName": "L1I_L3_REMOTE_WRITES", "BriefDescription": "L1I L3 Remote Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Instruction cache (Remote L2 cache)" }, { + "Unit": "CPU-M-CF", "EventCode": "133", "EventName": "L1D_L3_REMOTE_WRITES", "BriefDescription": "L1D L3 Remote Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Data cache (Remote L2 cache)" }, { + "Unit": "CPU-M-CF", "EventCode": "134", "EventName": "L1D_LMEM_SOURCED_WRITES", "BriefDescription": "L1D Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "135", "EventName": "L1I_LMEM_SOURCED_WRITES", "BriefDescription": "L1I Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the s ame book as the Instruction cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "136", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" }, { + "Unit": "CPU-M-CF", "EventCode": "137", "EventName": "L1I_CACHELINE_INVALIDATES", "BriefDescription": "L1I Cacheline Invalidates", "PublicDescription": "A cache line in the Level-1 I-Cache has been invalidated by a store on the same CPU as the Level-1 I-Cache" }, { + "Unit": "CPU-M-CF", "EventCode": "138", "EventName": "ITLB1_WRITES", "BriefDescription": "ITLB1 Writes", "PublicDescription": "A translation entry has been written into the Level-1 Instruction Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "139", "EventName": "DTLB1_WRITES", "BriefDescription": "DTLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "140", "EventName": "TLB2_PTE_WRITES", "BriefDescription": "TLB2 PTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "141", "EventName": "TLB2_CRSTE_WRITES", "BriefDescription": "TLB2 CRSTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "142", "EventName": "TLB2_CRSTE_HPAGE_WRITES", "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation" }, { + "Unit": "CPU-M-CF", "EventCode": "145", "EventName": "ITLB1_MISSES", "BriefDescription": "ITLB1 Misses", "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress" }, { + "Unit": "CPU-M-CF", "EventCode": "146", "EventName": "DTLB1_MISSES", "BriefDescription": "DTLB1 Misses", "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle an DTLB1 miss is in progress" }, { + "Unit": "CPU-M-CF", "EventCode": "147", "EventName": "L2C_STORES_SENT", "BriefDescription": "L2C Stores Sent", diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json index 8bf16759ca53..2dd8dafff2ef 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json @@ -1,71 +1,83 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "0", "EventName": "CPU_CYCLES", "BriefDescription": "CPU Cycles", "PublicDescription": "Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "1", "EventName": "INSTRUCTIONS", "BriefDescription": "Instructions", "PublicDescription": "Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "2", "EventName": "L1I_DIR_WRITES", "BriefDescription": "L1I Directory Writes", "PublicDescription": "Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "3", "EventName": "L1I_PENALTY_CYCLES", "BriefDescription": "L1I Penalty Cycles", "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "4", "EventName": "L1D_DIR_WRITES", "BriefDescription": "L1D Directory Writes", "PublicDescription": "Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "5", "EventName": "L1D_PENALTY_CYCLES", "BriefDescription": "L1D Penalty Cycles", "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "32", "EventName": "PROBLEM_STATE_CPU_CYCLES", "BriefDescription": "Problem-State CPU Cycles", "PublicDescription": "Problem-State Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "33", "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "34", "EventName": "PROBLEM_STATE_L1I_DIR_WRITES", "BriefDescription": "Problem-State L1I Directory Writes", "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "35", "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES", "BriefDescription": "Problem-State L1I Penalty Cycles", "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "36", "EventName": "PROBLEM_STATE_L1D_DIR_WRITES", "BriefDescription": "Problem-State L1D Directory Writes", "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "37", "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json index 7e5b72492141..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json @@ -1,95 +1,111 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "64", "EventName": "PRNG_FUNCTIONS", "BriefDescription": "PRNG Functions", "PublicDescription": "Total number of the PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "65", "EventName": "PRNG_CYCLES", "BriefDescription": "PRNG Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "66", "EventName": "PRNG_BLOCKED_FUNCTIONS", "BriefDescription": "PRNG Blocked Functions", "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "67", "EventName": "PRNG_BLOCKED_CYCLES", "BriefDescription": "PRNG Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "68", "EventName": "SHA_FUNCTIONS", "BriefDescription": "SHA Functions", "PublicDescription": "Total number of SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "69", "EventName": "SHA_CYCLES", "BriefDescription": "SHA Cycles", "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "70", "EventName": "SHA_BLOCKED_FUNCTIONS", "BriefDescription": "SHA Blocked Functions", "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "71", "EventName": "SHA_BLOCKED_CYCLES", "BriefDescription": "SHA Bloced Cycles", "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "72", "EventName": "DEA_FUNCTIONS", "BriefDescription": "DEA Functions", "PublicDescription": "Total number of the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "73", "EventName": "DEA_CYCLES", "BriefDescription": "DEA Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "74", "EventName": "DEA_BLOCKED_FUNCTIONS", "BriefDescription": "DEA Blocked Functions", "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "75", "EventName": "DEA_BLOCKED_CYCLES", "BriefDescription": "DEA Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "76", "EventName": "AES_FUNCTIONS", "BriefDescription": "AES Functions", "PublicDescription": "Total number of AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "77", "EventName": "AES_CYCLES", "BriefDescription": "AES Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "78", "EventName": "AES_BLOCKED_FUNCTIONS", "BriefDescription": "AES Blocked Functions", "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "79", "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json index 9a002b6967f1..436ce33f1182 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json @@ -1,335 +1,391 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "128", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line." }, { + "Unit": "CPU-M-CF", "EventCode": "129", "EventName": "DTLB1_WRITES", "BriefDescription": "DTLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "130", "EventName": "DTLB1_MISSES", "BriefDescription": "DTLB1 Misses", "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "131", "EventName": "DTLB1_HPAGE_WRITES", "BriefDescription": "DTLB1 One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page" }, { + "Unit": "CPU-M-CF", "EventCode": "132", "EventName": "DTLB1_GPAGE_WRITES", "BriefDescription": "DTLB1 Two-Gigabyte Page Writes", "PublicDescription": "Counter:132 Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page." }, { + "Unit": "CPU-M-CF", "EventCode": "133", "EventName": "L1D_L2D_SOURCED_WRITES", "BriefDescription": "L1D L2D Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache" }, { + "Unit": "CPU-M-CF", "EventCode": "134", "EventName": "ITLB1_WRITES", "BriefDescription": "ITLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "135", "EventName": "ITLB1_MISSES", "BriefDescription": "ITLB1 Misses", "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress" }, { + "Unit": "CPU-M-CF", "EventCode": "136", "EventName": "L1I_L2I_SOURCED_WRITES", "BriefDescription": "L1I L2I Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache" }, { + "Unit": "CPU-M-CF", "EventCode": "137", "EventName": "TLB2_PTE_WRITES", "BriefDescription": "TLB2 PTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "138", "EventName": "TLB2_CRSTE_HPAGE_WRITES", "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays for a one-megabyte large page translation" }, { + "Unit": "CPU-M-CF", "EventCode": "139", "EventName": "TLB2_CRSTE_WRITES", "BriefDescription": "TLB2 CRSTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "140", "EventName": "TX_C_TEND", "BriefDescription": "Completed TEND instructions in constrained TX mode", "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "141", "EventName": "TX_NC_TEND", "BriefDescription": "Completed TEND instructions in non-constrained TX mode", "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "143", "EventName": "L1C_TLB1_MISSES", "BriefDescription": "L1C TLB1 Misses", "PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-1 TLB miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "144", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "145", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "146", "EventName": "L1D_ONNODE_L4_SOURCED_WRITES", "BriefDescription": "L1D On-Node L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "147", "EventName": "L1D_ONNODE_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Node L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "148", "EventName": "L1D_ONNODE_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Node L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "149", "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1D On-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "150", "EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Drawer L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "151", "EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Drawer L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "152", "EventName": "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Same-Column L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "153", "EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "154", "EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "155", "EventName": "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "156", "EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "157", "EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "158", "EventName": "L1D_ONNODE_MEM_SOURCED_WRITES", "BriefDescription": "L1D On-Node Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Node memory" }, { + "Unit": "CPU-M-CF", "EventCode": "159", "EventName": "L1D_ONDRAWER_MEM_SOURCED_WRITES", "BriefDescription": "L1D On-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "160", "EventName": "L1D_OFFDRAWER_MEM_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "161", "EventName": "L1D_ONCHIP_MEM_SOURCED_WRITES", "BriefDescription": "L1D On-Chip Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory" }, { + "Unit": "CPU-M-CF", "EventCode": "162", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "163", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "164", "EventName": "L1I_ONNODE_L4_SOURCED_WRITES", "BriefDescription": "L1I On-Chip L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "165", "EventName": "L1I_ONNODE_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Node L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "166", "EventName": "L1I_ONNODE_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Node L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "167", "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1I On-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "168", "EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Drawer L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "169", "EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Drawer L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "170", "EventName": "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Same-Column L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "171", "EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "172", "EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "173", "EventName": "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Far-Column L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "174", "EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "175", "EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "176", "EventName": "L1I_ONNODE_MEM_SOURCED_WRITES", "BriefDescription": "L1I On-Node Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Node memory" }, { + "Unit": "CPU-M-CF", "EventCode": "177", "EventName": "L1I_ONDRAWER_MEM_SOURCED_WRITES", "BriefDescription": "L1I On-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "178", "EventName": "L1I_OFFDRAWER_MEM_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "179", "EventName": "L1I_ONCHIP_MEM_SOURCED_WRITES", "BriefDescription": "L1I On-Chip Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory" }, { + "Unit": "CPU-M-CF", "EventCode": "218", "EventName": "TX_NC_TABORT", "BriefDescription": "Aborted transactions in non-constrained TX mode", "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "219", "EventName": "TX_C_TABORT_NO_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete" }, { + "Unit": "CPU-M-CF", "EventCode": "220", "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" }, { + "Unit": "CPU-M-CF", "EventCode": "448", "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", "BriefDescription": "Cycle count with one thread active", "PublicDescription": "Cycle count with one thread active" }, { + "Unit": "CPU-M-CF", "EventCode": "449", "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json new file mode 100644 index 000000000000..1a0034f79f73 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json @@ -0,0 +1,7 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json index 8f653c9d899d..17fb5241928b 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json @@ -1,47 +1,55 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "0", "EventName": "CPU_CYCLES", "BriefDescription": "CPU Cycles", "PublicDescription": "Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "1", "EventName": "INSTRUCTIONS", "BriefDescription": "Instructions", "PublicDescription": "Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "2", "EventName": "L1I_DIR_WRITES", "BriefDescription": "L1I Directory Writes", "PublicDescription": "Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "3", "EventName": "L1I_PENALTY_CYCLES", "BriefDescription": "L1I Penalty Cycles", "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "4", "EventName": "L1D_DIR_WRITES", "BriefDescription": "L1D Directory Writes", "PublicDescription": "Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "5", "EventName": "L1D_PENALTY_CYCLES", "BriefDescription": "L1D Penalty Cycles", "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "32", "EventName": "PROBLEM_STATE_CPU_CYCLES", "BriefDescription": "Problem-State CPU Cycles", "PublicDescription": "Problem-State Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "33", "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json index 7e5b72492141..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json @@ -1,95 +1,111 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "64", "EventName": "PRNG_FUNCTIONS", "BriefDescription": "PRNG Functions", "PublicDescription": "Total number of the PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "65", "EventName": "PRNG_CYCLES", "BriefDescription": "PRNG Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "66", "EventName": "PRNG_BLOCKED_FUNCTIONS", "BriefDescription": "PRNG Blocked Functions", "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "67", "EventName": "PRNG_BLOCKED_CYCLES", "BriefDescription": "PRNG Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "68", "EventName": "SHA_FUNCTIONS", "BriefDescription": "SHA Functions", "PublicDescription": "Total number of SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "69", "EventName": "SHA_CYCLES", "BriefDescription": "SHA Cycles", "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "70", "EventName": "SHA_BLOCKED_FUNCTIONS", "BriefDescription": "SHA Blocked Functions", "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "71", "EventName": "SHA_BLOCKED_CYCLES", "BriefDescription": "SHA Bloced Cycles", "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "72", "EventName": "DEA_FUNCTIONS", "BriefDescription": "DEA Functions", "PublicDescription": "Total number of the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "73", "EventName": "DEA_CYCLES", "BriefDescription": "DEA Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "74", "EventName": "DEA_BLOCKED_FUNCTIONS", "BriefDescription": "DEA Blocked Functions", "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "75", "EventName": "DEA_BLOCKED_CYCLES", "BriefDescription": "DEA Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "76", "EventName": "AES_FUNCTIONS", "BriefDescription": "AES Functions", "PublicDescription": "Total number of AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "77", "EventName": "AES_CYCLES", "BriefDescription": "AES Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "78", "EventName": "AES_BLOCKED_FUNCTIONS", "BriefDescription": "AES Blocked Functions", "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "79", "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json index aa4dfb46b65b..e7a3524b748f 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json @@ -1,317 +1,370 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "128", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", "PublicDescription": "Counter:128 Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" }, { + "Unit": "CPU-M-CF", "EventCode": "129", "EventName": "DTLB2_WRITES", "BriefDescription": "DTLB2 Writes", "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache" }, { + "Unit": "CPU-M-CF", "EventCode": "130", "EventName": "DTLB2_MISSES", "BriefDescription": "DTLB2 Misses", "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle" }, { + "Unit": "CPU-M-CF", "EventCode": "131", "EventName": "DTLB2_HPAGE_WRITES", "BriefDescription": "DTLB2 One-Megabyte Page Writes", "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done" }, { + "Unit": "CPU-M-CF", "EventCode": "132", "EventName": "DTLB2_GPAGE_WRITES", "BriefDescription": "DTLB2 Two-Gigabyte Page Writes", "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB" }, { + "Unit": "CPU-M-CF", "EventCode": "133", "EventName": "L1D_L2D_SOURCED_WRITES", "BriefDescription": "L1D L2D Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache" }, { + "Unit": "CPU-M-CF", "EventCode": "134", "EventName": "ITLB2_WRITES", "BriefDescription": "ITLB2 Writes", "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache" }, { + "Unit": "CPU-M-CF", "EventCode": "135", "EventName": "ITLB2_MISSES", "BriefDescription": "ITLB2 Misses", "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle" }, { + "Unit": "CPU-M-CF", "EventCode": "136", "EventName": "L1I_L2I_SOURCED_WRITES", "BriefDescription": "L1I L2I Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache" }, { + "Unit": "CPU-M-CF", "EventCode": "137", "EventName": "TLB2_PTE_WRITES", "BriefDescription": "TLB2 PTE Writes", "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB" }, { + "Unit": "CPU-M-CF", "EventCode": "138", "EventName": "TLB2_CRSTE_WRITES", "BriefDescription": "TLB2 CRSTE Writes", "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB" }, { + "Unit": "CPU-M-CF", "EventCode": "139", "EventName": "TLB2_ENGINES_BUSY", "BriefDescription": "TLB2 Engines Busy", "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle" }, { + "Unit": "CPU-M-CF", "EventCode": "140", "EventName": "TX_C_TEND", "BriefDescription": "Completed TEND instructions in constrained TX mode", "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "141", "EventName": "TX_NC_TEND", "BriefDescription": "Completed TEND instructions in non-constrained TX mode", "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "143", "EventName": "L1C_TLB2_MISSES", "BriefDescription": "L1C TLB2 Misses", "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress" }, { + "Unit": "CPU-M-CF", "EventCode": "144", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "145", "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES", "BriefDescription": "L1D On-Chip Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory" }, { + "Unit": "CPU-M-CF", "EventCode": "146", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "147", "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Cluster L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention" }, { + "Unit": "CPU-M-CF", "EventCode": "148", "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1D On-Cluster Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory" }, { + "Unit": "CPU-M-CF", "EventCode": "149", "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "150", "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Cluster L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "151", "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1D Off-Cluster Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory" }, { + "Unit": "CPU-M-CF", "EventCode": "152", "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "153", "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "154", "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "155", "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "156", "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1D On-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "157", "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1D Off-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "158", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO", "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line" }, { + "Unit": "CPU-M-CF", "EventCode": "162", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "163", "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES", "BriefDescription": "L1I On-Chip Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory" }, { + "Unit": "CPU-M-CF", "EventCode": "164", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "165", "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Cluster L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "166", "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1I On-Cluster Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory" }, { + "Unit": "CPU-M-CF", "EventCode": "167", "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "168", "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Cluster L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "169", "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1I Off-Cluster Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory" }, { + "Unit": "CPU-M-CF", "EventCode": "170", "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "171", "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "172", "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory" }, { + "Unit": "CPU-M-CF", "EventCode": "173", "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "174", "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1I On-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "175", "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES", "BriefDescription": "L1I Off-Drawer L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "224", "EventName": "BCD_DFP_EXECUTION_SLOTS", "BriefDescription": "BCD DFP Execution Slots", "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT" }, { + "Unit": "CPU-M-CF", "EventCode": "225", "EventName": "VX_BCD_EXECUTION_SLOTS", "BriefDescription": "VX BCD Execution Slots", "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG" }, { + "Unit": "CPU-M-CF", "EventCode": "226", "EventName": "DECIMAL_INSTRUCTIONS", "BriefDescription": "Decimal Instructions", "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP" }, { + "Unit": "CPU-M-CF", "EventCode": "232", "EventName": "LAST_HOST_TRANSLATIONS", "BriefDescription": "Last host translation done", "PublicDescription": "Last Host Translation done" }, { + "Unit": "CPU-M-CF", "EventCode": "243", "EventName": "TX_NC_TABORT", "BriefDescription": "Aborted transactions in non-constrained TX mode", "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "244", "EventName": "TX_C_TABORT_NO_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete" }, { + "Unit": "CPU-M-CF", "EventCode": "245", "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete" }, { + "Unit": "CPU-M-CF", "EventCode": "448", "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE", "BriefDescription": "Cycle count with one thread active", "PublicDescription": "Cycle count with one thread active" }, { + "Unit": "CPU-M-CF", "EventCode": "449", "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE", "BriefDescription": "Cycle count with two threads active", diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json new file mode 100644 index 000000000000..1a0034f79f73 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json @@ -0,0 +1,7 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + } +] diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json index 8bf16759ca53..2dd8dafff2ef 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json @@ -1,71 +1,83 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "0", "EventName": "CPU_CYCLES", "BriefDescription": "CPU Cycles", "PublicDescription": "Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "1", "EventName": "INSTRUCTIONS", "BriefDescription": "Instructions", "PublicDescription": "Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "2", "EventName": "L1I_DIR_WRITES", "BriefDescription": "L1I Directory Writes", "PublicDescription": "Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "3", "EventName": "L1I_PENALTY_CYCLES", "BriefDescription": "L1I Penalty Cycles", "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "4", "EventName": "L1D_DIR_WRITES", "BriefDescription": "L1D Directory Writes", "PublicDescription": "Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "5", "EventName": "L1D_PENALTY_CYCLES", "BriefDescription": "L1D Penalty Cycles", "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "32", "EventName": "PROBLEM_STATE_CPU_CYCLES", "BriefDescription": "Problem-State CPU Cycles", "PublicDescription": "Problem-State Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "33", "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "34", "EventName": "PROBLEM_STATE_L1I_DIR_WRITES", "BriefDescription": "Problem-State L1I Directory Writes", "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "35", "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES", "BriefDescription": "Problem-State L1I Penalty Cycles", "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "36", "EventName": "PROBLEM_STATE_L1D_DIR_WRITES", "BriefDescription": "Problem-State L1D Directory Writes", "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "37", "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json index 7e5b72492141..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json @@ -1,95 +1,111 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "64", "EventName": "PRNG_FUNCTIONS", "BriefDescription": "PRNG Functions", "PublicDescription": "Total number of the PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "65", "EventName": "PRNG_CYCLES", "BriefDescription": "PRNG Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "66", "EventName": "PRNG_BLOCKED_FUNCTIONS", "BriefDescription": "PRNG Blocked Functions", "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "67", "EventName": "PRNG_BLOCKED_CYCLES", "BriefDescription": "PRNG Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "68", "EventName": "SHA_FUNCTIONS", "BriefDescription": "SHA Functions", "PublicDescription": "Total number of SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "69", "EventName": "SHA_CYCLES", "BriefDescription": "SHA Cycles", "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "70", "EventName": "SHA_BLOCKED_FUNCTIONS", "BriefDescription": "SHA Blocked Functions", "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "71", "EventName": "SHA_BLOCKED_CYCLES", "BriefDescription": "SHA Bloced Cycles", "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "72", "EventName": "DEA_FUNCTIONS", "BriefDescription": "DEA Functions", "PublicDescription": "Total number of the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "73", "EventName": "DEA_CYCLES", "BriefDescription": "DEA Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "74", "EventName": "DEA_BLOCKED_FUNCTIONS", "BriefDescription": "DEA Blocked Functions", "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "75", "EventName": "DEA_BLOCKED_CYCLES", "BriefDescription": "DEA Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "76", "EventName": "AES_FUNCTIONS", "BriefDescription": "AES Functions", "PublicDescription": "Total number of AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "77", "EventName": "AES_CYCLES", "BriefDescription": "AES Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "78", "EventName": "AES_BLOCKED_FUNCTIONS", "BriefDescription": "AES Blocked Functions", "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "79", "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json index b6d7fec7c2e7..b7b42a870bb0 100644 --- a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json @@ -1,143 +1,167 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "128", "EventName": "L1D_L2_SOURCED_WRITES", "BriefDescription": "L1D L2 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from the Level-2 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "129", "EventName": "L1I_L2_SOURCED_WRITES", "BriefDescription": "L1I L2 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "130", "EventName": "DTLB1_MISSES", "BriefDescription": "DTLB1 Misses", "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "131", "EventName": "ITLB1_MISSES", "BriefDescription": "ITLB1 Misses", "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "133", "EventName": "L2C_STORES_SENT", "BriefDescription": "L2C Stores Sent", "PublicDescription": "Incremented by one for every store sent to Level-2 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "134", "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Book L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "135", "EventName": "L1D_ONBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1D On-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "136", "EventName": "L1I_ONBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1I On-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "137", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" }, { + "Unit": "CPU-M-CF", "EventCode": "138", "EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1D Off-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "139", "EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1I Off-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "140", "EventName": "DTLB1_HPAGE_WRITES", "BriefDescription": "DTLB1 One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page" }, { + "Unit": "CPU-M-CF", "EventCode": "141", "EventName": "L1D_LMEM_SOURCED_WRITES", "BriefDescription": "L1D Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "142", "EventName": "L1I_LMEM_SOURCED_WRITES", "BriefDescription": "L1I Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "143", "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Book L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "144", "EventName": "DTLB1_WRITES", "BriefDescription": "DTLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "145", "EventName": "ITLB1_WRITES", "BriefDescription": "ITLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "146", "EventName": "TLB2_PTE_WRITES", "BriefDescription": "TLB2 PTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "147", "EventName": "TLB2_CRSTE_HPAGE_WRITES", "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation" }, { + "Unit": "CPU-M-CF", "EventCode": "148", "EventName": "TLB2_CRSTE_WRITES", "BriefDescription": "TLB2 CRSTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "150", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "152", "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "153", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "155", "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Chip L3 Sourced Writes", diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json index 8bf16759ca53..2dd8dafff2ef 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json @@ -1,71 +1,83 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "0", "EventName": "CPU_CYCLES", "BriefDescription": "CPU Cycles", "PublicDescription": "Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "1", "EventName": "INSTRUCTIONS", "BriefDescription": "Instructions", "PublicDescription": "Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "2", "EventName": "L1I_DIR_WRITES", "BriefDescription": "L1I Directory Writes", "PublicDescription": "Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "3", "EventName": "L1I_PENALTY_CYCLES", "BriefDescription": "L1I Penalty Cycles", "PublicDescription": "Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "4", "EventName": "L1D_DIR_WRITES", "BriefDescription": "L1D Directory Writes", "PublicDescription": "Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "5", "EventName": "L1D_PENALTY_CYCLES", "BriefDescription": "L1D Penalty Cycles", "PublicDescription": "Level-1 D-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "32", "EventName": "PROBLEM_STATE_CPU_CYCLES", "BriefDescription": "Problem-State CPU Cycles", "PublicDescription": "Problem-State Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "33", "EventName": "PROBLEM_STATE_INSTRUCTIONS", "BriefDescription": "Problem-State Instructions", "PublicDescription": "Problem-State Instruction Count" }, { + "Unit": "CPU-M-CF", "EventCode": "34", "EventName": "PROBLEM_STATE_L1I_DIR_WRITES", "BriefDescription": "Problem-State L1I Directory Writes", "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "35", "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES", "BriefDescription": "Problem-State L1I Penalty Cycles", "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count" }, { + "Unit": "CPU-M-CF", "EventCode": "36", "EventName": "PROBLEM_STATE_L1D_DIR_WRITES", "BriefDescription": "Problem-State L1D Directory Writes", "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count" }, { + "Unit": "CPU-M-CF", "EventCode": "37", "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES", "BriefDescription": "Problem-State L1D Penalty Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json index 7e5b72492141..db286f19e7b6 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json @@ -1,95 +1,111 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "64", "EventName": "PRNG_FUNCTIONS", "BriefDescription": "PRNG Functions", "PublicDescription": "Total number of the PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "65", "EventName": "PRNG_CYCLES", "BriefDescription": "PRNG Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "66", "EventName": "PRNG_BLOCKED_FUNCTIONS", "BriefDescription": "PRNG Blocked Functions", "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "67", "EventName": "PRNG_BLOCKED_CYCLES", "BriefDescription": "PRNG Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "68", "EventName": "SHA_FUNCTIONS", "BriefDescription": "SHA Functions", "PublicDescription": "Total number of SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "69", "EventName": "SHA_CYCLES", "BriefDescription": "SHA Cycles", "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "70", "EventName": "SHA_BLOCKED_FUNCTIONS", "BriefDescription": "SHA Blocked Functions", "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "71", "EventName": "SHA_BLOCKED_CYCLES", "BriefDescription": "SHA Bloced Cycles", "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "72", "EventName": "DEA_FUNCTIONS", "BriefDescription": "DEA Functions", "PublicDescription": "Total number of the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "73", "EventName": "DEA_CYCLES", "BriefDescription": "DEA Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "74", "EventName": "DEA_BLOCKED_FUNCTIONS", "BriefDescription": "DEA Blocked Functions", "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "75", "EventName": "DEA_BLOCKED_CYCLES", "BriefDescription": "DEA Blocked Cycles", "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "76", "EventName": "AES_FUNCTIONS", "BriefDescription": "AES Functions", "PublicDescription": "Total number of AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "77", "EventName": "AES_CYCLES", "BriefDescription": "AES Cycles", "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "78", "EventName": "AES_BLOCKED_FUNCTIONS", "BriefDescription": "AES Blocked Functions", "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU" }, { + "Unit": "CPU-M-CF", "EventCode": "79", "EventName": "AES_BLOCKED_CYCLES", "BriefDescription": "AES Blocked Cycles", diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json index 8682126aabb2..162251037219 100644 --- a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json @@ -1,209 +1,244 @@ [ { + "Unit": "CPU-M-CF", "EventCode": "128", "EventName": "DTLB1_MISSES", "BriefDescription": "DTLB1 Misses", "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "129", "EventName": "ITLB1_MISSES", "BriefDescription": "ITLB1 Misses", "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress." }, { + "Unit": "CPU-M-CF", "EventCode": "130", "EventName": "L1D_L2I_SOURCED_WRITES", "BriefDescription": "L1D L2I Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Instruction cache" }, { + "Unit": "CPU-M-CF", "EventCode": "131", "EventName": "L1I_L2I_SOURCED_WRITES", "BriefDescription": "L1I L2I Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache" }, { + "Unit": "CPU-M-CF", "EventCode": "132", "EventName": "L1D_L2D_SOURCED_WRITES", "BriefDescription": "L1D L2D Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache" }, { + "Unit": "CPU-M-CF", "EventCode": "133", "EventName": "DTLB1_WRITES", "BriefDescription": "DTLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "135", "EventName": "L1D_LMEM_SOURCED_WRITES", "BriefDescription": "L1D Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "137", "EventName": "L1I_LMEM_SOURCED_WRITES", "BriefDescription": "L1I Local Memory Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)" }, { + "Unit": "CPU-M-CF", "EventCode": "138", "EventName": "L1D_RO_EXCL_WRITES", "BriefDescription": "L1D Read-only Exclusive Writes", "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line" }, { + "Unit": "CPU-M-CF", "EventCode": "139", "EventName": "DTLB1_HPAGE_WRITES", "BriefDescription": "DTLB1 One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page" }, { + "Unit": "CPU-M-CF", "EventCode": "140", "EventName": "ITLB1_WRITES", "BriefDescription": "ITLB1 Writes", "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer" }, { + "Unit": "CPU-M-CF", "EventCode": "141", "EventName": "TLB2_PTE_WRITES", "BriefDescription": "TLB2 PTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "142", "EventName": "TLB2_CRSTE_HPAGE_WRITES", "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation" }, { + "Unit": "CPU-M-CF", "EventCode": "143", "EventName": "TLB2_CRSTE_WRITES", "BriefDescription": "TLB2 CRSTE Writes", "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays" }, { + "Unit": "CPU-M-CF", "EventCode": "144", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "145", "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "146", "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES", "BriefDescription": "L1D Off-Book L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "147", "EventName": "L1D_ONBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1D On-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "148", "EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1D Off-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "149", "EventName": "TX_NC_TEND", "BriefDescription": "Completed TEND instructions in non-constrained TX mode", "PublicDescription": "A TEND instruction has completed in a nonconstrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "150", "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from a On Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "151", "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "152", "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES_IV", "BriefDescription": "L1D Off-Book L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "153", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I On-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "154", "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Chip L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "155", "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES", "BriefDescription": "L1I Off-Book L3 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "156", "EventName": "L1I_ONBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1I On-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "157", "EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES", "BriefDescription": "L1I Off-Book L4 Sourced Writes", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-4 cache" }, { + "Unit": "CPU-M-CF", "EventCode": "158", "EventName": "TX_C_TEND", "BriefDescription": "Completed TEND instructions in constrained TX mode", "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "159", "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "160", "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Chip L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "161", "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES_IV", "BriefDescription": "L1I Off-Book L3 Sourced Writes with Intervention", "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention" }, { + "Unit": "CPU-M-CF", "EventCode": "177", "EventName": "TX_NC_TABORT", "BriefDescription": "Aborted transactions in non-constrained TX mode", "PublicDescription": "A transaction abort has occurred in a nonconstrained transactional-execution mode" }, { + "Unit": "CPU-M-CF", "EventCode": "178", "EventName": "TX_C_TABORT_NO_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic", "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete" }, { + "Unit": "CPU-M-CF", "EventCode": "179", "EventName": "TX_C_TABORT_SPECIAL", "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic", diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json new file mode 100644 index 000000000000..1a0034f79f73 --- /dev/null +++ b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json @@ -0,0 +1,7 @@ +[ + { + "BriefDescription": "Transaction count", + "MetricName": "transaction", + "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL" + } +] diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json index d40498f2cb1e..635c09fda1d9 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json @@ -188,7 +188,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -199,7 +199,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -210,7 +210,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -221,7 +221,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -232,7 +232,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -243,7 +243,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -254,7 +254,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -265,7 +265,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json index 16034bfd06dd..8755693d86c6 100644 --- a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json +++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json @@ -187,7 +187,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES", - "Filter": "filter_band0=1200", + "Filter": "filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -198,7 +198,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES", - "Filter": "filter_band1=2000", + "Filter": "filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -209,7 +209,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES", - "Filter": "filter_band2=3000", + "Filter": "filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -220,7 +220,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES", - "Filter": "filter_band3=4000", + "Filter": "filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", @@ -231,7 +231,7 @@ "Counter": "0,1,2,3", "EventCode": "0xb", "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band0=1200", + "Filter": "edge=1,filter_band0=12", "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_1200mhz_cycles %", "PerPkg": "1", @@ -242,7 +242,7 @@ "Counter": "0,1,2,3", "EventCode": "0xc", "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band1=2000", + "Filter": "edge=1,filter_band1=20", "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_2000mhz_cycles %", "PerPkg": "1", @@ -253,7 +253,7 @@ "Counter": "0,1,2,3", "EventCode": "0xd", "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band2=4000", + "Filter": "edge=1,filter_band2=30", "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_3000mhz_cycles %", "PerPkg": "1", @@ -264,7 +264,7 @@ "Counter": "0,1,2,3", "EventCode": "0xe", "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS", - "Filter": "edge=1,filter_band3=4000", + "Filter": "edge=1,filter_band3=40", "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.", "MetricName": "freq_ge_4000mhz_cycles %", "PerPkg": "1", diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index db3a594ee1e4..68c92bb599ee 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -233,6 +233,8 @@ static struct map { { "QPI LL", "uncore_qpi" }, { "SBO", "uncore_sbox" }, { "iMPH-U", "uncore_arb" }, + { "CPU-M-CF", "cpum_cf" }, + { "CPU-M-SF", "cpum_sf" }, {} }; diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py index 38dfb720fb6f..54ace2f6bc36 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py @@ -31,10 +31,8 @@ def flag_str(event_name, field_name, value): string = "" if flag_fields[event_name][field_name]: - print_delim = 0 - keys = flag_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + print_delim = 0 + for idx in sorted(flag_fields[event_name][field_name]['values']): if not value and not idx: string += flag_fields[event_name][field_name]['values'][idx] break @@ -51,14 +49,12 @@ def symbol_str(event_name, field_name, value): string = "" if symbolic_fields[event_name][field_name]: - keys = symbolic_fields[event_name][field_name]['values'].keys() - keys.sort() - for idx in keys: + for idx in sorted(symbolic_fields[event_name][field_name]['values']): if not value and not idx: - string = symbolic_fields[event_name][field_name]['values'][idx] + string = symbolic_fields[event_name][field_name]['values'][idx] break - if (value == idx): - string = symbolic_fields[event_name][field_name]['values'][idx] + if (value == idx): + string = symbolic_fields[event_name][field_name]['values'][idx] break return string @@ -74,19 +70,17 @@ def trace_flag_str(value): string = "" print_delim = 0 - keys = trace_flags.keys() - - for idx in keys: - if not value and not idx: - string += "NONE" - break - - if idx and (value & idx) == idx: - if print_delim: - string += " | "; - string += trace_flags[idx] - print_delim = 1 - value &= ~idx + for idx in trace_flags: + if not value and not idx: + string += "NONE" + break + + if idx and (value & idx) == idx: + if print_delim: + string += " | "; + string += trace_flags[idx] + print_delim = 1 + value &= ~idx return string diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py index 81a56cd2b3c1..21a7a1298094 100755 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -8,6 +8,7 @@ # PerfEvent is the base class for all perf event sample, PebsEvent # is a HW base Intel x86 PEBS event, and user could add more SW/HW # event classes based on requirements. +from __future__ import print_function import struct @@ -44,7 +45,8 @@ class PerfEvent(object): PerfEvent.event_num += 1 def show(self): - print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % + (self.name, self.symbol, self.comm, self.dso)) # # Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py index fdd92f699055..cac7b2542ee8 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py @@ -11,7 +11,7 @@ try: import wx except ImportError: - raise ImportError, "You need to install the wxpython lib for this script" + raise ImportError("You need to install the wxpython lib for this script") class RootFrame(wx.Frame): diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py index f6c84966e4f8..7384dcb628c4 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py @@ -5,6 +5,7 @@ # This software may be distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. +from __future__ import print_function import errno, os @@ -33,7 +34,7 @@ def nsecs_str(nsecs): return str def add_stats(dict, key, value): - if not dict.has_key(key): + if key not in dict: dict[key] = (value, value, value, 1) else: min, max, avg, count = dict[key] @@ -72,10 +73,10 @@ try: except: if not audit_package_warned: audit_package_warned = True - print "Install the audit-libs-python package to get syscall names.\n" \ - "For example:\n # apt-get install python-audit (Ubuntu)" \ - "\n # yum install audit-libs-python (Fedora)" \ - "\n etc.\n" + print("Install the audit-libs-python package to get syscall names.\n" + "For example:\n # apt-get install python-audit (Ubuntu)" + "\n # yum install audit-libs-python (Fedora)" + "\n etc.\n") def syscall_name(id): try: diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-record b/tools/perf/scripts/python/bin/powerpc-hcalls-record new file mode 100644 index 000000000000..b7402aa9147d --- /dev/null +++ b/tools/perf/scripts/python/bin/powerpc-hcalls-record @@ -0,0 +1,2 @@ +#!/bin/bash +perf record -e "{powerpc:hcall_entry,powerpc:hcall_exit}" $@ diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-report b/tools/perf/scripts/python/bin/powerpc-hcalls-report new file mode 100644 index 000000000000..dd32ad7465f6 --- /dev/null +++ b/tools/perf/scripts/python/bin/powerpc-hcalls-report @@ -0,0 +1,2 @@ +#!/bin/bash +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/powerpc-hcalls.py diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py deleted file mode 100644 index b494a67a1c67..000000000000 --- a/tools/perf/scripts/python/call-graph-from-sql.py +++ /dev/null @@ -1,339 +0,0 @@ -#!/usr/bin/python2 -# call-graph-from-sql.py: create call-graph from sql database -# Copyright (c) 2014-2017, Intel Corporation. -# -# This program is free software; you can redistribute it and/or modify it -# under the terms and conditions of the GNU General Public License, -# version 2, as published by the Free Software Foundation. -# -# This program is distributed in the hope it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for -# more details. - -# To use this script you will need to have exported data using either the -# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those -# scripts for details. -# -# Following on from the example in the export scripts, a -# call-graph can be displayed for the pt_example database like this: -# -# python tools/perf/scripts/python/call-graph-from-sql.py pt_example -# -# Note that for PostgreSQL, this script supports connecting to remote databases -# by setting hostname, port, username, password, and dbname e.g. -# -# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" -# -# The result is a GUI window with a tree representing a context-sensitive -# call-graph. Expanding a couple of levels of the tree and adjusting column -# widths to suit will display something like: -# -# Call Graph: pt_example -# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) -# v- ls -# v- 2638:2638 -# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 -# |- unknown unknown 1 13198 0.1 1 0.0 -# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 -# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 -# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 -# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 -# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 -# >- __libc_csu_init ls 1 10354 0.1 10 0.0 -# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 -# v- main ls 1 8182043 99.6 180254 99.9 -# -# Points to note: -# The top level is a command name (comm) -# The next level is a thread (pid:tid) -# Subsequent levels are functions -# 'Count' is the number of calls -# 'Time' is the elapsed time until the function returns -# Percentages are relative to the level above -# 'Branch Count' is the total number of branches for that function and all -# functions that it calls - -import sys -from PySide.QtCore import * -from PySide.QtGui import * -from PySide.QtSql import * -from decimal import * - -class TreeItem(): - - def __init__(self, db, row, parent_item): - self.db = db - self.row = row - self.parent_item = parent_item - self.query_done = False; - self.child_count = 0 - self.child_items = [] - self.data = ["", "", "", "", "", "", ""] - self.comm_id = 0 - self.thread_id = 0 - self.call_path_id = 1 - self.branch_count = 0 - self.time = 0 - if not parent_item: - self.setUpRoot() - - def setUpRoot(self): - self.query_done = True - query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, comm FROM comms') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - while query.next(): - if not query.value(0): - continue - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel1(query.value(0), query.value(1)) - - def setUpLevel1(self, comm_id, comm): - self.query_done = True; - self.comm_id = comm_id - self.data[0] = comm - self.child_items = [] - self.child_count = 0 - query = QSqlQuery(self.db) - ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - while query.next(): - child_item = TreeItem(self.db, self.child_count, self) - self.child_items.append(child_item) - self.child_count += 1 - child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) - - def setUpLevel2(self, comm_id, thread_id, pid, tid): - self.comm_id = comm_id - self.thread_id = thread_id - self.data[0] = str(pid) + ":" + str(tid) - - def getChildItem(self, row): - return self.child_items[row] - - def getParentItem(self): - return self.parent_item - - def getRow(self): - return self.row - - def timePercent(self, b): - if not self.time: - return "0.0" - x = (b * Decimal(100)) / self.time - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - - def branchPercent(self, b): - if not self.branch_count: - return "0.0" - x = (b * Decimal(100)) / self.branch_count - return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) - - def addChild(self, call_path_id, name, dso, count, time, branch_count): - child_item = TreeItem(self.db, self.child_count, self) - child_item.comm_id = self.comm_id - child_item.thread_id = self.thread_id - child_item.call_path_id = call_path_id - child_item.branch_count = branch_count - child_item.time = time - child_item.data[0] = name - if dso == "[kernel.kallsyms]": - dso = "[kernel]" - child_item.data[1] = dso - child_item.data[2] = str(count) - child_item.data[3] = str(time) - child_item.data[4] = self.timePercent(time) - child_item.data[5] = str(branch_count) - child_item.data[6] = self.branchPercent(branch_count) - self.child_items.append(child_item) - self.child_count += 1 - - def selectCalls(self): - self.query_done = True; - query = QSqlQuery(self.db) - ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' - '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' - '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' - '( SELECT ip FROM call_paths where id = call_path_id ) ' - 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + - ' ORDER BY call_path_id') - if not ret: - raise Exception("Query failed: " + query.lastError().text()) - last_call_path_id = 0 - name = "" - dso = "" - count = 0 - branch_count = 0 - total_branch_count = 0 - time = 0 - total_time = 0 - while query.next(): - if query.value(1) == last_call_path_id: - count += 1 - branch_count += query.value(2) - time += query.value(4) - query.value(3) - else: - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - last_call_path_id = query.value(1) - name = query.value(5) - dso = query.value(6) - count = 1 - total_branch_count += branch_count - total_time += time - branch_count = query.value(2) - time = query.value(4) - query.value(3) - if count: - self.addChild(last_call_path_id, name, dso, count, time, branch_count) - total_branch_count += branch_count - total_time += time - # Top level does not have time or branch count, so fix that here - if total_branch_count > self.branch_count: - self.branch_count = total_branch_count - if self.branch_count: - for child_item in self.child_items: - child_item.data[6] = self.branchPercent(child_item.branch_count) - if total_time > self.time: - self.time = total_time - if self.time: - for child_item in self.child_items: - child_item.data[4] = self.timePercent(child_item.time) - - def childCount(self): - if not self.query_done: - self.selectCalls() - return self.child_count - - def columnCount(self): - return 7 - - def columnHeader(self, column): - headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] - return headers[column] - - def getData(self, column): - return self.data[column] - -class TreeModel(QAbstractItemModel): - - def __init__(self, db, parent=None): - super(TreeModel, self).__init__(parent) - self.db = db - self.root = TreeItem(db, 0, None) - - def columnCount(self, parent): - return self.root.columnCount() - - def rowCount(self, parent): - if parent.isValid(): - parent_item = parent.internalPointer() - else: - parent_item = self.root - return parent_item.childCount() - - def headerData(self, section, orientation, role): - if role == Qt.TextAlignmentRole: - if section > 1: - return Qt.AlignRight - if role != Qt.DisplayRole: - return None - if orientation != Qt.Horizontal: - return None - return self.root.columnHeader(section) - - def parent(self, child): - child_item = child.internalPointer() - if child_item is self.root: - return QModelIndex() - parent_item = child_item.getParentItem() - return self.createIndex(parent_item.getRow(), 0, parent_item) - - def index(self, row, column, parent): - if parent.isValid(): - parent_item = parent.internalPointer() - else: - parent_item = self.root - child_item = parent_item.getChildItem(row) - return self.createIndex(row, column, child_item) - - def data(self, index, role): - if role == Qt.TextAlignmentRole: - if index.column() > 1: - return Qt.AlignRight - if role != Qt.DisplayRole: - return None - index_item = index.internalPointer() - return index_item.getData(index.column()) - -class MainWindow(QMainWindow): - - def __init__(self, db, dbname, parent=None): - super(MainWindow, self).__init__(parent) - - self.setObjectName("MainWindow") - self.setWindowTitle("Call Graph: " + dbname) - self.move(100, 100) - self.resize(800, 600) - style = self.style() - icon = style.standardIcon(QStyle.SP_MessageBoxInformation) - self.setWindowIcon(icon); - - self.model = TreeModel(db) - - self.view = QTreeView() - self.view.setModel(self.model) - - self.setCentralWidget(self.view) - -if __name__ == '__main__': - if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>" - raise Exception("Too few arguments") - - dbname = sys.argv[1] - - is_sqlite3 = False - try: - f = open(dbname) - if f.read(15) == "SQLite format 3": - is_sqlite3 = True - f.close() - except: - pass - - if is_sqlite3: - db = QSqlDatabase.addDatabase('QSQLITE') - else: - db = QSqlDatabase.addDatabase('QPSQL') - opts = dbname.split() - for opt in opts: - if '=' in opt: - opt = opt.split('=') - if opt[0] == 'hostname': - db.setHostName(opt[1]) - elif opt[0] == 'port': - db.setPort(int(opt[1])) - elif opt[0] == 'username': - db.setUserName(opt[1]) - elif opt[0] == 'password': - db.setPassword(opt[1]) - elif opt[0] == 'dbname': - dbname = opt[1] - else: - dbname = opt - - db.setDatabaseName(dbname) - if not db.open(): - raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) - - app = QApplication(sys.argv) - window = MainWindow(db, dbname) - window.show() - err = app.exec_() - db.close() - sys.exit(err) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index efcaf6cac2eb..0564dd7377f2 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -59,7 +59,7 @@ import datetime # pt_example=# \q # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # Tables: # @@ -204,14 +204,23 @@ from ctypes import * libpq = CDLL("libpq.so.5") PQconnectdb = libpq.PQconnectdb PQconnectdb.restype = c_void_p +PQconnectdb.argtypes = [ c_char_p ] PQfinish = libpq.PQfinish +PQfinish.argtypes = [ c_void_p ] PQstatus = libpq.PQstatus +PQstatus.restype = c_int +PQstatus.argtypes = [ c_void_p ] PQexec = libpq.PQexec PQexec.restype = c_void_p +PQexec.argtypes = [ c_void_p, c_char_p ] PQresultStatus = libpq.PQresultStatus +PQresultStatus.restype = c_int +PQresultStatus.argtypes = [ c_void_p ] PQputCopyData = libpq.PQputCopyData +PQputCopyData.restype = c_int PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ] PQputCopyEnd = libpq.PQputCopyEnd +PQputCopyEnd.restype = c_int PQputCopyEnd.argtypes = [ c_void_p, c_void_p ] sys.path.append(os.environ['PERF_EXEC_PATH'] + \ diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index f827bf77e9d2..245caf2643ed 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -40,7 +40,7 @@ import datetime # sqlite> .quit # # An example of using the database is provided by the script -# call-graph-from-sql.py. Refer to that script for details. +# exported-sql-viewer.py. Refer to that script for details. # # The database structure is practically the same as created by the script # export-to-postgresql.py. Refer to that script for details. A notable @@ -440,7 +440,11 @@ def branch_type_table(*x): def sample_table(*x): if branches: - bind_exec(sample_query, 18, x) + for xx in x[0:15]: + sample_query.addBindValue(str(xx)) + for xx in x[19:22]: + sample_query.addBindValue(str(xx)) + do_query_(sample_query) else: bind_exec(sample_query, 22, x) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py new file mode 100755 index 000000000000..f278ce5ebab7 --- /dev/null +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -0,0 +1,2615 @@ +#!/usr/bin/python2 +# SPDX-License-Identifier: GPL-2.0 +# exported-sql-viewer.py: view data from sql database +# Copyright (c) 2014-2018, Intel Corporation. + +# To use this script you will need to have exported data using either the +# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those +# scripts for details. +# +# Following on from the example in the export scripts, a +# call-graph can be displayed for the pt_example database like this: +# +# python tools/perf/scripts/python/exported-sql-viewer.py pt_example +# +# Note that for PostgreSQL, this script supports connecting to remote databases +# by setting hostname, port, username, password, and dbname e.g. +# +# python tools/perf/scripts/python/exported-sql-viewer.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" +# +# The result is a GUI window with a tree representing a context-sensitive +# call-graph. Expanding a couple of levels of the tree and adjusting column +# widths to suit will display something like: +# +# Call Graph: pt_example +# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +# v- ls +# v- 2638:2638 +# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 +# |- unknown unknown 1 13198 0.1 1 0.0 +# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 +# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 +# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 +# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 +# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 +# >- __libc_csu_init ls 1 10354 0.1 10 0.0 +# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 +# v- main ls 1 8182043 99.6 180254 99.9 +# +# Points to note: +# The top level is a command name (comm) +# The next level is a thread (pid:tid) +# Subsequent levels are functions +# 'Count' is the number of calls +# 'Time' is the elapsed time until the function returns +# Percentages are relative to the level above +# 'Branch Count' is the total number of branches for that function and all +# functions that it calls + +# There is also a "All branches" report, which displays branches and +# possibly disassembly. However, presently, the only supported disassembler is +# Intel XED, and additionally the object code must be present in perf build ID +# cache. To use Intel XED, libxed.so must be present. To build and install +# libxed.so: +# git clone https://github.com/intelxed/mbuild.git mbuild +# git clone https://github.com/intelxed/xed +# cd xed +# ./mfile.py --share +# sudo ./mfile.py --prefix=/usr/local install +# sudo ldconfig +# +# Example report: +# +# Time CPU Command PID TID Branch Type In Tx Branch +# 8107675239590 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 8107675239899 2 ls 22011 22011 hardware interrupt No 7fab593ea260 _start (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675241900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea260 _start (ld-2.19.so) +# 7fab593ea260 48 89 e7 mov %rsp, %rdi +# 7fab593ea263 e8 c8 06 00 00 callq 0x7fab593ea930 +# 8107675241900 2 ls 22011 22011 call No 7fab593ea263 _start+0x3 (ld-2.19.so) -> 7fab593ea930 _dl_start (ld-2.19.so) +# 7fab593ea930 55 pushq %rbp +# 7fab593ea931 48 89 e5 mov %rsp, %rbp +# 7fab593ea934 41 57 pushq %r15 +# 7fab593ea936 41 56 pushq %r14 +# 7fab593ea938 41 55 pushq %r13 +# 7fab593ea93a 41 54 pushq %r12 +# 7fab593ea93c 53 pushq %rbx +# 7fab593ea93d 48 89 fb mov %rdi, %rbx +# 7fab593ea940 48 83 ec 68 sub $0x68, %rsp +# 7fab593ea944 0f 31 rdtsc +# 7fab593ea946 48 c1 e2 20 shl $0x20, %rdx +# 7fab593ea94a 89 c0 mov %eax, %eax +# 7fab593ea94c 48 09 c2 or %rax, %rdx +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 8107675242232 2 ls 22011 22011 hardware interrupt No 7fab593ea94f _dl_start+0x1f (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +# 8107675242900 2 ls 22011 22011 return from interrupt No ffffffff86a00a67 native_irq_return_iret ([kernel]) -> 7fab593ea94f _dl_start+0x1f (ld-2.19.so) +# 7fab593ea94f 48 8b 05 1a 15 22 00 movq 0x22151a(%rip), %rax +# 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip) +# 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) + +import sys +import weakref +import threading +import string +import cPickle +import re +import os +from PySide.QtCore import * +from PySide.QtGui import * +from PySide.QtSql import * +from decimal import * +from ctypes import * +from multiprocessing import Process, Array, Value, Event + +# Data formatting helpers + +def tohex(ip): + if ip < 0: + ip += 1 << 64 + return "%x" % ip + +def offstr(offset): + if offset: + return "+0x%x" % offset + return "" + +def dsoname(name): + if name == "[kernel.kallsyms]": + return "[kernel]" + return name + +def findnth(s, sub, n, offs=0): + pos = s.find(sub) + if pos < 0: + return pos + if n <= 1: + return offs + pos + return findnth(s[pos + 1:], sub, n - 1, offs + pos + 1) + +# Percent to one decimal place + +def PercentToOneDP(n, d): + if not d: + return "0.0" + x = (n * Decimal(100)) / d + return str(x.quantize(Decimal(".1"), rounding=ROUND_HALF_UP)) + +# Helper for queries that must not fail + +def QueryExec(query, stmt): + ret = query.exec_(stmt) + if not ret: + raise Exception("Query failed: " + query.lastError().text()) + +# Background thread + +class Thread(QThread): + + done = Signal(object) + + def __init__(self, task, param=None, parent=None): + super(Thread, self).__init__(parent) + self.task = task + self.param = param + + def run(self): + while True: + if self.param is None: + done, result = self.task() + else: + done, result = self.task(self.param) + self.done.emit(result) + if done: + break + +# Tree data model + +class TreeModel(QAbstractItemModel): + + def __init__(self, root, parent=None): + super(TreeModel, self).__init__(parent) + self.root = root + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self.root + + def rowCount(self, parent): + result = self.Item(parent).childCount() + if result < 0: + result = 0 + self.dataChanged.emit(parent, parent) + return result + + def hasChildren(self, parent): + return self.Item(parent).hasChildren() + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def parent(self, child): + child_item = child.internalPointer() + if child_item is self.root: + return QModelIndex() + parent_item = child_item.getParentItem() + return self.createIndex(parent_item.getRow(), 0, parent_item) + + def index(self, row, column, parent): + child_item = self.Item(parent).getChildItem(row) + return self.createIndex(row, column, child_item) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.root.child_count: + self.fetcher.Fetch(glb_chunk_sz) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Table data model + +class TableModel(QAbstractTableModel): + + def __init__(self, parent=None): + super(TableModel, self).__init__(parent) + self.child_count = 0 + self.child_items = [] + self.last_row_read = 0 + + def Item(self, parent): + if parent.isValid(): + return parent.internalPointer() + else: + return self + + def rowCount(self, parent): + return self.child_count + + def headerData(self, section, orientation, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(section) + if role != Qt.DisplayRole: + return None + if orientation != Qt.Horizontal: + return None + return self.columnHeader(section) + + def index(self, row, column, parent): + return self.createIndex(row, column, self.child_items[row]) + + def DisplayData(self, item, index): + return item.getData(index.column()) + + def FetchIfNeeded(self, row): + if row > self.last_row_read: + self.last_row_read = row + if row + 10 >= self.child_count: + self.fetcher.Fetch(glb_chunk_sz) + + def columnAlignment(self, column): + return Qt.AlignLeft + + def columnFont(self, column): + return None + + def data(self, index, role): + if role == Qt.TextAlignmentRole: + return self.columnAlignment(index.column()) + if role == Qt.FontRole: + return self.columnFont(index.column()) + if role != Qt.DisplayRole: + return None + item = index.internalPointer() + return self.DisplayData(item, index) + +# Model cache + +model_cache = weakref.WeakValueDictionary() +model_cache_lock = threading.Lock() + +def LookupCreateModel(model_name, create_fn): + model_cache_lock.acquire() + try: + model = model_cache[model_name] + except: + model = None + if model is None: + model = create_fn() + model_cache[model_name] = model + model_cache_lock.release() + return model + +# Find bar + +class FindBar(): + + def __init__(self, parent, finder, is_reg_expr=False): + self.finder = finder + self.context = [] + self.last_value = None + self.last_pattern = None + + label = QLabel("Find:") + label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.textbox = QComboBox() + self.textbox.setEditable(True) + self.textbox.currentIndexChanged.connect(self.ValueChanged) + + self.progress = QProgressBar() + self.progress.setRange(0, 0) + self.progress.hide() + + if is_reg_expr: + self.pattern = QCheckBox("Regular Expression") + else: + self.pattern = QCheckBox("Pattern") + self.pattern.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.next_button = QToolButton() + self.next_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowDown)) + self.next_button.released.connect(lambda: self.NextPrev(1)) + + self.prev_button = QToolButton() + self.prev_button.setIcon(parent.style().standardIcon(QStyle.SP_ArrowUp)) + self.prev_button.released.connect(lambda: self.NextPrev(-1)) + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(label) + self.hbox.addWidget(self.textbox) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.pattern) + self.hbox.addWidget(self.next_button) + self.hbox.addWidget(self.prev_button) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.hide() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.textbox.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Busy(self): + self.textbox.setEnabled(False) + self.pattern.hide() + self.next_button.hide() + self.prev_button.hide() + self.progress.show() + + def Idle(self): + self.textbox.setEnabled(True) + self.progress.hide() + self.pattern.show() + self.next_button.show() + self.prev_button.show() + + def Find(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + self.last_value = value + self.last_pattern = pattern + self.finder.Find(value, direction, pattern, self.context) + + def ValueChanged(self): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + index = self.textbox.currentIndex() + data = self.textbox.itemData(index) + # Store the pattern in the combo box to keep it with the text value + if data == None: + self.textbox.setItemData(index, pattern) + else: + self.pattern.setChecked(data) + self.Find(0) + + def NextPrev(self, direction): + value = self.textbox.currentText() + pattern = self.pattern.isChecked() + if value != self.last_value: + index = self.textbox.findText(value) + # Allow for a button press before the value has been added to the combo box + if index < 0: + index = self.textbox.count() + self.textbox.addItem(value, pattern) + self.textbox.setCurrentIndex(index) + return + else: + self.textbox.setItemData(index, pattern) + elif pattern != self.last_pattern: + # Keep the pattern recorded in the combo box up to date + index = self.textbox.currentIndex() + self.textbox.setItemData(index, pattern) + self.Find(direction) + + def NotFound(self): + QMessageBox.information(self.bar, "Find", "'" + self.textbox.currentText() + "' not found") + +# Context-sensitive call graph data model item base + +class CallGraphLevelItemBase(object): + + def __init__(self, glb, row, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.query_done = False; + self.child_count = 0 + self.child_items = [] + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Context-sensitive call graph data model level 2+ item base + +class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): + super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) + self.comm_id = comm_id + self.thread_id = thread_id + self.call_path_id = call_path_id + self.branch_count = branch_count + self.time = time + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " INNER JOIN dsos ON symbols.dso_id = dsos.id" + " WHERE parent_call_path_id = " + str(self.call_path_id) + + " AND comm_id = " + str(self.comm_id) + + " AND thread_id = " + str(self.thread_id) + + " GROUP BY call_path_id, name, short_name" + " ORDER BY call_path_id") + while query.next(): + child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model level three item + +class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): + super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) + dso = dsoname(dso) + self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] + self.dbid = call_path_id + +# Context-sensitive call graph data model level two item + +class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): + + def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): + super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) + self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] + self.dbid = thread_id + + def Select(self): + super(CallGraphLevelTwoItem, self).Select() + for child_item in self.child_items: + self.time += child_item.time + self.branch_count += child_item.branch_count + for child_item in self.child_items: + child_item.data[4] = PercentToOneDP(child_item.time, self.time) + child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) + +# Context-sensitive call graph data model level one item + +class CallGraphLevelOneItem(CallGraphLevelItemBase): + + def __init__(self, glb, row, comm_id, comm, parent_item): + super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) + self.data = [comm, "", "", "", "", "", ""] + self.dbid = comm_id + + def Select(self): + self.query_done = True; + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT thread_id, pid, tid" + " FROM comm_threads" + " INNER JOIN threads ON thread_id = threads.id" + " WHERE comm_id = " + str(self.dbid)) + while query.next(): + child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model root item + +class CallGraphRootItem(CallGraphLevelItemBase): + + def __init__(self, glb): + super(CallGraphRootItem, self).__init__(glb, 0, None) + self.dbid = 0 + self.query_done = True; + query = QSqlQuery(glb.db) + QueryExec(query, "SELECT id, comm FROM comms") + while query.next(): + if not query.value(0): + continue + child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) + self.child_items.append(child_item) + self.child_count += 1 + +# Context-sensitive call graph data model + +class CallGraphModel(TreeModel): + + def __init__(self, glb, parent=None): + super(CallGraphModel, self).__init__(CallGraphRootItem(glb), parent) + self.glb = glb + + def columnCount(self, parent=None): + return 7 + + def columnHeader(self, column): + headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] + return headers[column] + + def columnAlignment(self, column): + alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] + return alignment[column] + + def FindSelect(self, value, pattern, query): + if pattern: + # postgresql and sqlite pattern patching differences: + # postgresql LIKE is case sensitive but sqlite LIKE is not + # postgresql LIKE allows % and _ to be escaped with \ but sqlite LIKE does not + # postgresql supports ILIKE which is case insensitive + # sqlite supports GLOB (text only) which uses * and ? and is case sensitive + if not self.glb.dbref.is_sqlite3: + # Escape % and _ + s = value.replace("%", "\%") + s = s.replace("_", "\_") + # Translate * and ? into SQL LIKE pattern characters % and _ + trans = string.maketrans("*?", "%_") + match = " LIKE '" + str(s).translate(trans) + "'" + else: + match = " GLOB '" + str(value) + "'" + else: + match = " = '" + str(value) + "'" + QueryExec(query, "SELECT call_path_id, comm_id, thread_id" + " FROM calls" + " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" + " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" + " WHERE symbols.name" + match + + " GROUP BY comm_id, thread_id, call_path_id" + " ORDER BY comm_id, thread_id, call_path_id") + + def FindPath(self, query): + # Turn the query result into a list of ids that the tree view can walk + # to open the tree at the right place. + ids = [] + parent_id = query.value(0) + while parent_id: + ids.insert(0, parent_id) + q2 = QSqlQuery(self.glb.db) + QueryExec(q2, "SELECT parent_id" + " FROM call_paths" + " WHERE id = " + str(parent_id)) + if not q2.next(): + break + parent_id = q2.value(0) + # The call path root is not used + if ids[0] == 1: + del ids[0] + ids.insert(0, query.value(2)) + ids.insert(0, query.value(1)) + return ids + + def Found(self, query, found): + if found: + return self.FindPath(query) + return [] + + def FindValue(self, value, pattern, query, last_value, last_pattern): + if last_value == value and pattern == last_pattern: + found = query.first() + else: + self.FindSelect(value, pattern, query) + found = query.next() + return self.Found(query, found) + + def FindNext(self, query): + found = query.next() + if not found: + found = query.first() + return self.Found(query, found) + + def FindPrev(self, query): + found = query.previous() + if not found: + found = query.last() + return self.Found(query, found) + + def FindThread(self, c): + if c.direction == 0 or c.value != c.last_value or c.pattern != c.last_pattern: + ids = self.FindValue(c.value, c.pattern, c.query, c.last_value, c.last_pattern) + elif c.direction > 0: + ids = self.FindNext(c.query) + else: + ids = self.FindPrev(c.query) + return (True, ids) + + def Find(self, value, direction, pattern, context, callback): + class Context(): + def __init__(self, *x): + self.value, self.direction, self.pattern, self.query, self.last_value, self.last_pattern = x + def Update(self, *x): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = x + (self.value, self.pattern) + if len(context): + context[0].Update(value, direction, pattern) + else: + context.append(Context(value, direction, pattern, QSqlQuery(self.glb.db), None, None)) + # Use a thread so the UI is not blocked during the SELECT + thread = Thread(self.FindThread, context[0]) + thread.done.connect(lambda ids, t=thread, c=callback: self.FindDone(t, c, ids), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, ids): + callback(ids) + +# Vertical widget layout + +class VBox(): + + def __init__(self, w1, w2, w3=None): + self.vbox = QWidget() + self.vbox.setLayout(QVBoxLayout()); + + self.vbox.layout().setContentsMargins(0, 0, 0, 0) + + self.vbox.layout().addWidget(w1) + self.vbox.layout().addWidget(w2) + if w3: + self.vbox.layout().addWidget(w3) + + def Widget(self): + return self.vbox + +# Context-sensitive call graph window + +class CallGraphWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(CallGraphWindow, self).__init__(parent) + + self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x)) + + self.view = QTreeView() + self.view.setModel(self.model) + + for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): + self.view.setColumnWidth(c, w) + + self.find_bar = FindBar(self, self) + + self.vbox = VBox(self.view, self.find_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Context-Sensitive Call Graph") + + def DisplayFound(self, ids): + if not len(ids): + return False + parent = QModelIndex() + for dbid in ids: + found = False + n = self.model.rowCount(parent) + for row in xrange(n): + child = self.model.index(row, 0, parent) + if child.internalPointer().dbid == dbid: + found = True + self.view.setCurrentIndex(child) + parent = child + break + if not found: + break + return found + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.model.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, ids): + found = True + if not self.DisplayFound(ids): + found = False + self.find_bar.Idle() + if not found: + self.find_bar.NotFound() + +# Child data item finder + +class ChildDataItemFinder(): + + def __init__(self, root): + self.root = root + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (None,) * 5 + self.rows = [] + self.pos = 0 + + def FindSelect(self): + self.rows = [] + if self.pattern: + pattern = re.compile(self.value) + for child in self.root.child_items: + for column_data in child.data: + if re.search(pattern, str(column_data)) is not None: + self.rows.append(child.row) + break + else: + for child in self.root.child_items: + for column_data in child.data: + if self.value in str(column_data): + self.rows.append(child.row) + break + + def FindValue(self): + self.pos = 0 + if self.last_value != self.value or self.pattern != self.last_pattern: + self.FindSelect() + if not len(self.rows): + return -1 + return self.rows[self.pos] + + def FindThread(self): + if self.direction == 0 or self.value != self.last_value or self.pattern != self.last_pattern: + row = self.FindValue() + elif len(self.rows): + if self.direction > 0: + self.pos += 1 + if self.pos >= len(self.rows): + self.pos = 0 + else: + self.pos -= 1 + if self.pos < 0: + self.pos = len(self.rows) - 1 + row = self.rows[self.pos] + else: + row = -1 + return (True, row) + + def Find(self, value, direction, pattern, context, callback): + self.value, self.direction, self.pattern, self.last_value, self.last_pattern = (value, direction,pattern, self.value, self.pattern) + # Use a thread so the UI is not blocked + thread = Thread(self.FindThread) + thread.done.connect(lambda row, t=thread, c=callback: self.FindDone(t, c, row), Qt.QueuedConnection) + thread.start() + + def FindDone(self, thread, callback, row): + callback(row) + +# Number of database records to fetch in one go + +glb_chunk_sz = 10000 + +# size of pickled integer big enough for record size + +glb_nsz = 8 + +# Background process for SQL data fetcher + +class SQLFetcherProcess(): + + def __init__(self, dbref, sql, buffer, head, tail, fetch_count, fetching_done, process_target, wait_event, fetched_event, prep): + # Need a unique connection name + conn_name = "SQLFetcher" + str(os.getpid()) + self.db, dbname = dbref.Open(conn_name) + self.sql = sql + self.buffer = buffer + self.head = head + self.tail = tail + self.fetch_count = fetch_count + self.fetching_done = fetching_done + self.process_target = process_target + self.wait_event = wait_event + self.fetched_event = fetched_event + self.prep = prep + self.query = QSqlQuery(self.db) + self.query_limit = 0 if "$$last_id$$" in sql else 2 + self.last_id = -1 + self.fetched = 0 + self.more = True + self.local_head = self.head.value + self.local_tail = self.tail.value + + def Select(self): + if self.query_limit: + if self.query_limit == 1: + return + self.query_limit -= 1 + stmt = self.sql.replace("$$last_id$$", str(self.last_id)) + QueryExec(self.query, stmt) + + def Next(self): + if not self.query.next(): + self.Select() + if not self.query.next(): + return None + self.last_id = self.query.value(0) + return self.prep(self.query) + + def WaitForTarget(self): + while True: + self.wait_event.clear() + target = self.process_target.value + if target > self.fetched or target < 0: + break + self.wait_event.wait() + return target + + def HasSpace(self, sz): + if self.local_tail <= self.local_head: + space = len(self.buffer) - self.local_head + if space > sz: + return True + if space >= glb_nsz: + # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer + nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL) + self.buffer[self.local_head : self.local_head + len(nd)] = nd + self.local_head = 0 + if self.local_tail - self.local_head > sz: + return True + return False + + def WaitForSpace(self, sz): + if self.HasSpace(sz): + return + while True: + self.wait_event.clear() + self.local_tail = self.tail.value + if self.HasSpace(sz): + return + self.wait_event.wait() + + def AddToBuffer(self, obj): + d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL) + n = len(d) + nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL) + sz = n + glb_nsz + self.WaitForSpace(sz) + pos = self.local_head + self.buffer[pos : pos + len(nd)] = nd + self.buffer[pos + glb_nsz : pos + sz] = d + self.local_head += sz + + def FetchBatch(self, batch_size): + fetched = 0 + while batch_size > fetched: + obj = self.Next() + if obj is None: + self.more = False + break + self.AddToBuffer(obj) + fetched += 1 + if fetched: + self.fetched += fetched + with self.fetch_count.get_lock(): + self.fetch_count.value += fetched + self.head.value = self.local_head + self.fetched_event.set() + + def Run(self): + while self.more: + target = self.WaitForTarget() + if target < 0: + break + batch_size = min(glb_chunk_sz, target - self.fetched) + self.FetchBatch(batch_size) + self.fetching_done.value = True + self.fetched_event.set() + +def SQLFetcherFn(*x): + process = SQLFetcherProcess(*x) + process.Run() + +# SQL data fetcher + +class SQLFetcher(QObject): + + done = Signal(object) + + def __init__(self, glb, sql, prep, process_data, parent=None): + super(SQLFetcher, self).__init__(parent) + self.process_data = process_data + self.more = True + self.target = 0 + self.last_target = 0 + self.fetched = 0 + self.buffer_size = 16 * 1024 * 1024 + self.buffer = Array(c_char, self.buffer_size, lock=False) + self.head = Value(c_longlong) + self.tail = Value(c_longlong) + self.local_tail = 0 + self.fetch_count = Value(c_longlong) + self.fetching_done = Value(c_bool) + self.last_count = 0 + self.process_target = Value(c_longlong) + self.wait_event = Event() + self.fetched_event = Event() + glb.AddInstanceToShutdownOnExit(self) + self.process = Process(target=SQLFetcherFn, args=(glb.dbref, sql, self.buffer, self.head, self.tail, self.fetch_count, self.fetching_done, self.process_target, self.wait_event, self.fetched_event, prep)) + self.process.start() + self.thread = Thread(self.Thread) + self.thread.done.connect(self.ProcessData, Qt.QueuedConnection) + self.thread.start() + + def Shutdown(self): + # Tell the thread and process to exit + self.process_target.value = -1 + self.wait_event.set() + self.more = False + self.fetching_done.value = True + self.fetched_event.set() + + def Thread(self): + if not self.more: + return True, 0 + while True: + self.fetched_event.clear() + fetch_count = self.fetch_count.value + if fetch_count != self.last_count: + break + if self.fetching_done.value: + self.more = False + return True, 0 + self.fetched_event.wait() + count = fetch_count - self.last_count + self.last_count = fetch_count + self.fetched += count + return False, count + + def Fetch(self, nr): + if not self.more: + # -1 inidcates there are no more + return -1 + result = self.fetched + extra = result + nr - self.target + if extra > 0: + self.target += extra + # process_target < 0 indicates shutting down + if self.process_target.value >= 0: + self.process_target.value = self.target + self.wait_event.set() + return result + + def RemoveFromBuffer(self): + pos = self.local_tail + if len(self.buffer) - pos < glb_nsz: + pos = 0 + n = cPickle.loads(self.buffer[pos : pos + glb_nsz]) + if n == 0: + pos = 0 + n = cPickle.loads(self.buffer[0 : glb_nsz]) + pos += glb_nsz + obj = cPickle.loads(self.buffer[pos : pos + n]) + self.local_tail = pos + n + return obj + + def ProcessData(self, count): + for i in xrange(count): + obj = self.RemoveFromBuffer() + self.process_data(obj) + self.tail.value = self.local_tail + self.wait_event.set() + self.done.emit(count) + +# Fetch more records bar + +class FetchMoreRecordsBar(): + + def __init__(self, model, parent): + self.model = model + + self.label = QLabel("Number of records (x " + "{:,}".format(glb_chunk_sz) + ") to fetch:") + self.label.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch_count = QSpinBox() + self.fetch_count.setRange(1, 1000000) + self.fetch_count.setValue(10) + self.fetch_count.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.fetch = QPushButton("Go!") + self.fetch.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + self.fetch.released.connect(self.FetchMoreRecords) + + self.progress = QProgressBar() + self.progress.setRange(0, 100) + self.progress.hide() + + self.done_label = QLabel("All records fetched") + self.done_label.hide() + + self.spacer = QLabel("") + + self.close_button = QToolButton() + self.close_button.setIcon(parent.style().standardIcon(QStyle.SP_DockWidgetCloseButton)) + self.close_button.released.connect(self.Deactivate) + + self.hbox = QHBoxLayout() + self.hbox.setContentsMargins(0, 0, 0, 0) + + self.hbox.addWidget(self.label) + self.hbox.addWidget(self.fetch_count) + self.hbox.addWidget(self.fetch) + self.hbox.addWidget(self.spacer) + self.hbox.addWidget(self.progress) + self.hbox.addWidget(self.done_label) + self.hbox.addWidget(self.close_button) + + self.bar = QWidget() + self.bar.setLayout(self.hbox); + self.bar.show() + + self.in_progress = False + self.model.progress.connect(self.Progress) + + self.done = False + + if not model.HasMoreRecords(): + self.Done() + + def Widget(self): + return self.bar + + def Activate(self): + self.bar.show() + self.fetch.setFocus() + + def Deactivate(self): + self.bar.hide() + + def Enable(self, enable): + self.fetch.setEnabled(enable) + self.fetch_count.setEnabled(enable) + + def Busy(self): + self.Enable(False) + self.fetch.hide() + self.spacer.hide() + self.progress.show() + + def Idle(self): + self.in_progress = False + self.Enable(True) + self.progress.hide() + self.fetch.show() + self.spacer.show() + + def Target(self): + return self.fetch_count.value() * glb_chunk_sz + + def Done(self): + self.done = True + self.Idle() + self.label.hide() + self.fetch_count.hide() + self.fetch.hide() + self.spacer.hide() + self.done_label.show() + + def Progress(self, count): + if self.in_progress: + if count: + percent = ((count - self.start) * 100) / self.Target() + if percent >= 100: + self.Idle() + else: + self.progress.setValue(percent) + if not count: + # Count value of zero means no more records + self.Done() + + def FetchMoreRecords(self): + if self.done: + return + self.progress.setValue(0) + self.Busy() + self.in_progress = True + self.start = self.model.FetchMoreRecords(self.Target()) + +# Brance data model level two item + +class BranchLevelTwoItem(): + + def __init__(self, row, text, parent_item): + self.row = row + self.parent_item = parent_item + self.data = [""] * 8 + self.data[7] = text + self.level = 2 + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def childCount(self): + return 0 + + def hasChildren(self): + return False + + def getData(self, column): + return self.data[column] + +# Brance data model level one item + +class BranchLevelOneItem(): + + def __init__(self, glb, row, data, parent_item): + self.glb = glb + self.row = row + self.parent_item = parent_item + self.child_count = 0 + self.child_items = [] + self.data = data[1:] + self.dbid = data[0] + self.level = 1 + self.query_done = False + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return self.parent_item + + def getRow(self): + return self.row + + def Select(self): + self.query_done = True + + if not self.glb.have_disassembler: + return + + query = QSqlQuery(self.glb.db) + + QueryExec(query, "SELECT cpu, to_dso_id, to_symbol_id, to_sym_offset, short_name, long_name, build_id, sym_start, to_ip" + " FROM samples" + " INNER JOIN dsos ON samples.to_dso_id = dsos.id" + " INNER JOIN symbols ON samples.to_symbol_id = symbols.id" + " WHERE samples.id = " + str(self.dbid)) + if not query.next(): + return + cpu = query.value(0) + dso = query.value(1) + sym = query.value(2) + if dso == 0 or sym == 0: + return + off = query.value(3) + short_name = query.value(4) + long_name = query.value(5) + build_id = query.value(6) + sym_start = query.value(7) + ip = query.value(8) + + QueryExec(query, "SELECT samples.dso_id, symbol_id, sym_offset, sym_start" + " FROM samples" + " INNER JOIN symbols ON samples.symbol_id = symbols.id" + " WHERE samples.id > " + str(self.dbid) + " AND cpu = " + str(cpu) + + " ORDER BY samples.id" + " LIMIT 1") + if not query.next(): + return + if query.value(0) != dso: + # Cannot disassemble from one dso to another + return + bsym = query.value(1) + boff = query.value(2) + bsym_start = query.value(3) + if bsym == 0: + return + tot = bsym_start + boff + 1 - sym_start - off + if tot <= 0 or tot > 16384: + return + + inst = self.glb.disassembler.Instruction() + f = self.glb.FileFromNamesAndBuildId(short_name, long_name, build_id) + if not f: + return + mode = 0 if Is64Bit(f) else 1 + self.glb.disassembler.SetMode(inst, mode) + + buf_sz = tot + 16 + buf = create_string_buffer(tot + 16) + f.seek(sym_start + off) + buf.value = f.read(buf_sz) + buf_ptr = addressof(buf) + i = 0 + while tot > 0: + cnt, text = self.glb.disassembler.DisassembleOne(inst, buf_ptr, buf_sz, ip) + if cnt: + byte_str = tohex(ip).rjust(16) + for k in xrange(cnt): + byte_str += " %02x" % ord(buf[i]) + i += 1 + while k < 15: + byte_str += " " + k += 1 + self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) + self.child_count += 1 + else: + return + buf_ptr += cnt + tot -= cnt + buf_sz -= cnt + ip += cnt + + def childCount(self): + if not self.query_done: + self.Select() + if not self.child_count: + return -1 + return self.child_count + + def hasChildren(self): + if not self.query_done: + return True + return self.child_count > 0 + + def getData(self, column): + return self.data[column] + +# Brance data model root item + +class BranchRootItem(): + + def __init__(self): + self.child_count = 0 + self.child_items = [] + self.level = 0 + + def getChildItem(self, row): + return self.child_items[row] + + def getParentItem(self): + return None + + def getRow(self): + return 0 + + def childCount(self): + return self.child_count + + def hasChildren(self): + return self.child_count > 0 + + def getData(self, column): + return "" + +# Branch data preparation + +def BranchDataPrep(query): + data = [] + for i in xrange(0, 8): + data.append(query.value(i)) + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + return data + +# Branch data model + +class BranchModel(TreeModel): + + progress = Signal(object) + + def __init__(self, glb, event_id, where_clause, parent=None): + super(BranchModel, self).__init__(BranchRootItem(), parent) + self.glb = glb + self.event_id = event_id + self.more = True + self.populated = 0 + sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," + " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," + " ip, symbols.name, sym_offset, dsos.short_name," + " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" + " FROM samples" + " INNER JOIN comms ON comm_id = comms.id" + " INNER JOIN threads ON thread_id = threads.id" + " INNER JOIN branch_types ON branch_type = branch_types.id" + " INNER JOIN symbols ON symbol_id = symbols.id" + " INNER JOIN symbols to_symbols ON to_symbol_id = to_symbols.id" + " INNER JOIN dsos ON samples.dso_id = dsos.id" + " INNER JOIN dsos AS to_dsos ON samples.to_dso_id = to_dsos.id" + " WHERE samples.id > $$last_id$$" + where_clause + + " AND evsel_id = " + str(self.event_id) + + " ORDER BY samples.id" + " LIMIT " + str(glb_chunk_sz)) + self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def columnCount(self, parent=None): + return 8 + + def columnHeader(self, column): + return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] + + def columnFont(self, column): + if column != 7: + return None + return QFont("Monospace") + + def DisplayData(self, item, index): + if item.level == 1: + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = BranchLevelOneItem(self.glb, self.populated, data, self.root) + self.root.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.root.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.root.child_count += count + self.endInsertRows() + self.progress.emit(self.root.child_count) + + def FetchMoreRecords(self, count): + current = self.root.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# Branch window + +class BranchWindow(QMdiSubWindow): + + def __init__(self, glb, event_id, name, where_clause, parent=None): + super(BranchWindow, self).__init__(parent) + + model_name = "Branch Events " + str(event_id) + if len(where_clause): + model_name = where_clause + " " + model_name + + self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause)) + + self.view = QTreeView() + self.view.setUniformRowHeights(True) + self.view.setModel(self.model) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.model.root) + + self.fetch_bar = FetchMoreRecordsBar(self.model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events") + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + mm = "MM" if column else "MMMM" + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.model.root.child_items[row].data[column] + len = metrics.width(str(val) + mm) + max = len if len > max else max + val = self.model.columnHeader(column) + len = metrics.width(str(val) + mm) + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.model.root.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) + else: + self.find_bar.NotFound() + +# Dialog data item converted and validated using a SQL table + +class SQLTableDialogDataItem(): + + def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): + self.glb = glb + self.label = label + self.placeholder_text = placeholder_text + self.table_name = table_name + self.match_column = match_column + self.column_name1 = column_name1 + self.column_name2 = column_name2 + self.parent = parent + + self.value = "" + + self.widget = QLineEdit() + self.widget.editingFinished.connect(self.Validate) + self.widget.textChanged.connect(self.Invalidate) + self.red = False + self.error = "" + self.validated = True + + self.last_id = 0 + self.first_time = 0 + self.last_time = 2 ** 64 + if self.table_name == "<timeranges>": + query = QSqlQuery(self.glb.db) + QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") + if query.next(): + self.last_id = int(query.value(0)) + self.last_time = int(query.value(1)) + QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") + if query.next(): + self.first_time = int(query.value(0)) + if placeholder_text: + placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) + + if placeholder_text: + self.widget.setPlaceholderText(placeholder_text) + + def ValueToIds(self, value): + ids = [] + query = QSqlQuery(self.glb.db) + stmt = "SELECT id FROM " + self.table_name + " WHERE " + self.match_column + " = '" + value + "'" + ret = query.exec_(stmt) + if ret: + while query.next(): + ids.append(str(query.value(0))) + return ids + + def IdBetween(self, query, lower_id, higher_id, order): + QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") + if query.next(): + return True, int(query.value(0)) + else: + return False, 0 + + def BinarySearchTime(self, lower_id, higher_id, target_time, get_floor): + query = QSqlQuery(self.glb.db) + while True: + next_id = int((lower_id + higher_id) / 2) + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + if not query.next(): + ok, dbid = self.IdBetween(query, lower_id, next_id, "DESC") + if not ok: + ok, dbid = self.IdBetween(query, next_id, higher_id, "") + if not ok: + return str(higher_id) + next_id = dbid + QueryExec(query, "SELECT time FROM samples WHERE id = " + str(next_id)) + next_time = int(query.value(0)) + if get_floor: + if target_time > next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(higher_id) + else: + if target_time >= next_time: + lower_id = next_id + else: + higher_id = next_id + if higher_id <= lower_id + 1: + return str(lower_id) + + def ConvertRelativeTime(self, val): + print "val ", val + mult = 1 + suffix = val[-2:] + if suffix == "ms": + mult = 1000000 + elif suffix == "us": + mult = 1000 + elif suffix == "ns": + mult = 1 + else: + return val + val = val[:-2].strip() + if not self.IsNumber(val): + return val + val = int(val) * mult + if val >= 0: + val += self.first_time + else: + val += self.last_time + return str(val) + + def ConvertTimeRange(self, vrange): + print "vrange ", vrange + if vrange[0] == "": + vrange[0] = str(self.first_time) + if vrange[1] == "": + vrange[1] = str(self.last_time) + vrange[0] = self.ConvertRelativeTime(vrange[0]) + vrange[1] = self.ConvertRelativeTime(vrange[1]) + print "vrange2 ", vrange + if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return False + print "ok1" + beg_range = max(int(vrange[0]), self.first_time) + end_range = min(int(vrange[1]), self.last_time) + if beg_range > self.last_time or end_range < self.first_time: + return False + print "ok2" + vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) + vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) + print "vrange3 ", vrange + return True + + def AddTimeRange(self, value, ranges): + print "value ", value + n = value.count("-") + if n == 1: + pass + elif n == 2: + if value.split("-")[1].strip() == "": + n = 1 + elif n == 3: + n = 2 + else: + return False + pos = findnth(value, "-", n) + vrange = [value[:pos].strip() ,value[pos+1:].strip()] + if self.ConvertTimeRange(vrange): + ranges.append(vrange) + return True + return False + + def InvalidValue(self, value): + self.value = "" + palette = QPalette() + palette.setColor(QPalette.Text,Qt.red) + self.widget.setPalette(palette) + self.red = True + self.error = self.label + " invalid value '" + value + "'" + self.parent.ShowMessage(self.error) + + def IsNumber(self, value): + try: + x = int(value) + except: + x = 0 + return str(x) == value + + def Invalidate(self): + self.validated = False + + def Validate(self): + input_string = self.widget.text() + self.validated = True + if self.red: + palette = QPalette() + self.widget.setPalette(palette) + self.red = False + if not len(input_string.strip()): + self.error = "" + self.value = "" + return + if self.table_name == "<timeranges>": + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if not self.AddTimeRange(value, ranges): + return self.InvalidValue(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + self.value = " OR ".join(ranges) + elif self.table_name == "<ranges>": + singles = [] + ranges = [] + for value in [x.strip() for x in input_string.split(",")]: + if "-" in value: + vrange = value.split("-") + if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): + return self.InvalidValue(value) + ranges.append(vrange) + else: + if not self.IsNumber(value): + return self.InvalidValue(value) + singles.append(value) + ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] + if len(singles): + ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")") + self.value = " OR ".join(ranges) + elif self.table_name: + all_ids = [] + for value in [x.strip() for x in input_string.split(",")]: + ids = self.ValueToIds(value) + if len(ids): + all_ids.extend(ids) + else: + return self.InvalidValue(value) + self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" + if self.column_name2: + self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" + else: + self.value = input_string.strip() + self.error = "" + self.parent.ClearMessage() + + def IsValid(self): + if not self.validated: + self.Validate() + if len(self.error): + self.parent.ShowMessage(self.error) + return False + return True + +# Selected branch report creation dialog + +class SelectedBranchDialog(QDialog): + + def __init__(self, glb, parent=None): + super(SelectedBranchDialog, self).__init__(parent) + + self.glb = glb + + self.name = "" + self.where_clause = "" + + self.setWindowTitle("Selected Branches") + self.setMinimumWidth(600) + + items = ( + ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), + ("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""), + ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""), + ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""), + ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""), + ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""), + ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"), + ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"), + ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""), + ) + self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items] + + self.grid = QGridLayout() + + for row in xrange(len(self.data_items)): + self.grid.addWidget(QLabel(self.data_items[row].label), row, 0) + self.grid.addWidget(self.data_items[row].widget, row, 1) + + self.status = QLabel() + + self.ok_button = QPushButton("Ok", self) + self.ok_button.setDefault(True) + self.ok_button.released.connect(self.Ok) + self.ok_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.cancel_button = QPushButton("Cancel", self) + self.cancel_button.released.connect(self.reject) + self.cancel_button.setSizePolicy(QSizePolicy.Fixed, QSizePolicy.Fixed) + + self.hbox = QHBoxLayout() + #self.hbox.addStretch() + self.hbox.addWidget(self.status) + self.hbox.addWidget(self.ok_button) + self.hbox.addWidget(self.cancel_button) + + self.vbox = QVBoxLayout() + self.vbox.addLayout(self.grid) + self.vbox.addLayout(self.hbox) + + self.setLayout(self.vbox); + + def Ok(self): + self.name = self.data_items[0].value + if not self.name: + self.ShowMessage("Report name is required") + return + for d in self.data_items: + if not d.IsValid(): + return + for d in self.data_items[1:]: + if len(d.value): + if len(self.where_clause): + self.where_clause += " AND " + self.where_clause += d.value + if len(self.where_clause): + self.where_clause = " AND ( " + self.where_clause + " ) " + else: + self.ShowMessage("No selection") + return + self.accept() + + def ShowMessage(self, msg): + self.status.setText("<font color=#FF0000>" + msg) + + def ClearMessage(self): + self.status.setText("") + +# Event list + +def GetEventList(db): + events = [] + query = QSqlQuery(db) + QueryExec(query, "SELECT name FROM selected_events WHERE id > 0 ORDER BY id") + while query.next(): + events.append(query.value(0)) + return events + +# SQL data preparation + +def SQLTableDataPrep(query, count): + data = [] + for i in xrange(count): + data.append(query.value(i)) + return data + +# SQL table data model item + +class SQLTableItem(): + + def __init__(self, row, data): + self.row = row + self.data = data + + def getData(self, column): + return self.data[column] + +# SQL table data model + +class SQLTableModel(TableModel): + + progress = Signal(object) + + def __init__(self, glb, sql, column_count, parent=None): + super(SQLTableModel, self).__init__(parent) + self.glb = glb + self.more = True + self.populated = 0 + self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample) + self.fetcher.done.connect(self.Update) + self.fetcher.Fetch(glb_chunk_sz) + + def DisplayData(self, item, index): + self.FetchIfNeeded(item.row) + return item.getData(index.column()) + + def AddSample(self, data): + child = SQLTableItem(self.populated, data) + self.child_items.append(child) + self.populated += 1 + + def Update(self, fetched): + if not fetched: + self.more = False + self.progress.emit(0) + child_count = self.child_count + count = self.populated - child_count + if count > 0: + parent = QModelIndex() + self.beginInsertRows(parent, child_count, child_count + count - 1) + self.insertRows(child_count, count, parent) + self.child_count += count + self.endInsertRows() + self.progress.emit(self.child_count) + + def FetchMoreRecords(self, count): + current = self.child_count + if self.more: + self.fetcher.Fetch(count) + else: + self.progress.emit(0) + return current + + def HasMoreRecords(self): + return self.more + +# SQL automatic table data model + +class SQLAutoTableModel(SQLTableModel): + + def __init__(self, glb, table_name, parent=None): + sql = "SELECT * FROM " + table_name + " WHERE id > $$last_id$$ ORDER BY id LIMIT " + str(glb_chunk_sz) + if table_name == "comm_threads_view": + # For now, comm_threads_view has no id column + sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) + self.column_headers = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "PRAGMA table_info(" + table_name + ")") + while query.next(): + self.column_headers.append(query.value(1)) + if table_name == "sqlite_master": + sql = "SELECT * FROM " + table_name + else: + if table_name[:19] == "information_schema.": + sql = "SELECT * FROM " + table_name + select_table_name = table_name[19:] + schema = "information_schema" + else: + select_table_name = table_name + schema = "public" + QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") + while query.next(): + self.column_headers.append(query.value(0)) + super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent) + + def columnCount(self, parent=None): + return len(self.column_headers) + + def columnHeader(self, column): + return self.column_headers[column] + +# Base class for custom ResizeColumnsToContents + +class ResizeColumnsToContentsBase(QObject): + + def __init__(self, parent=None): + super(ResizeColumnsToContentsBase, self).__init__(parent) + + def ResizeColumnToContents(self, column, n): + # Using the view's resizeColumnToContents() here is extrememly slow + # so implement a crude alternative + font = self.view.font() + metrics = QFontMetrics(font) + max = 0 + for row in xrange(n): + val = self.data_model.child_items[row].data[column] + len = metrics.width(str(val) + "MM") + max = len if len > max else max + val = self.data_model.columnHeader(column) + len = metrics.width(str(val) + "MM") + max = len if len > max else max + self.view.setColumnWidth(column, max) + + def ResizeColumnsToContents(self): + n = min(self.data_model.child_count, 100) + if n < 1: + # No data yet, so connect a signal to notify when there is + self.data_model.rowsInserted.connect(self.UpdateColumnWidths) + return + columns = self.data_model.columnCount() + for i in xrange(columns): + self.ResizeColumnToContents(i, n) + + def UpdateColumnWidths(self, *x): + # This only needs to be done once, so disconnect the signal now + self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths) + self.ResizeColumnsToContents() + +# Table window + +class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): + + def __init__(self, glb, table_name, parent=None): + super(TableWindow, self).__init__(parent) + + self.data_model = LookupCreateModel(table_name + " Table", lambda: SQLAutoTableModel(glb, table_name)) + + self.model = QSortFilterProxyModel() + self.model.setSourceModel(self.data_model) + + self.view = QTableView() + self.view.setModel(self.model) + self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) + self.view.verticalHeader().setVisible(False) + self.view.sortByColumn(-1, Qt.AscendingOrder) + self.view.setSortingEnabled(True) + + self.ResizeColumnsToContents() + + self.find_bar = FindBar(self, self, True) + + self.finder = ChildDataItemFinder(self.data_model) + + self.fetch_bar = FetchMoreRecordsBar(self.data_model, self) + + self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) + + self.setWidget(self.vbox.Widget()) + + AddSubWindow(glb.mainwindow.mdi_area, self, table_name + " Table") + + def Find(self, value, direction, pattern, context): + self.view.setFocus() + self.find_bar.Busy() + self.finder.Find(value, direction, pattern, context, self.FindDone) + + def FindDone(self, row): + self.find_bar.Idle() + if row >= 0: + self.view.setCurrentIndex(self.model.mapFromSource(self.data_model.index(row, 0, QModelIndex()))) + else: + self.find_bar.NotFound() + +# Table list + +def GetTableList(glb): + tables = [] + query = QSqlQuery(glb.db) + if glb.dbref.is_sqlite3: + QueryExec(query, "SELECT name FROM sqlite_master WHERE type IN ( 'table' , 'view' ) ORDER BY name") + else: + QueryExec(query, "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_type IN ( 'BASE TABLE' , 'VIEW' ) ORDER BY table_name") + while query.next(): + tables.append(query.value(0)) + if glb.dbref.is_sqlite3: + tables.append("sqlite_master") + else: + tables.append("information_schema.tables") + tables.append("information_schema.views") + tables.append("information_schema.columns") + return tables + +# Action Definition + +def CreateAction(label, tip, callback, parent=None, shortcut=None): + action = QAction(label, parent) + if shortcut != None: + action.setShortcuts(shortcut) + action.setStatusTip(tip) + action.triggered.connect(callback) + return action + +# Typical application actions + +def CreateExitAction(app, parent=None): + return CreateAction("&Quit", "Exit the application", app.closeAllWindows, parent, QKeySequence.Quit) + +# Typical MDI actions + +def CreateCloseActiveWindowAction(mdi_area): + return CreateAction("Cl&ose", "Close the active window", mdi_area.closeActiveSubWindow, mdi_area) + +def CreateCloseAllWindowsAction(mdi_area): + return CreateAction("Close &All", "Close all the windows", mdi_area.closeAllSubWindows, mdi_area) + +def CreateTileWindowsAction(mdi_area): + return CreateAction("&Tile", "Tile the windows", mdi_area.tileSubWindows, mdi_area) + +def CreateCascadeWindowsAction(mdi_area): + return CreateAction("&Cascade", "Cascade the windows", mdi_area.cascadeSubWindows, mdi_area) + +def CreateNextWindowAction(mdi_area): + return CreateAction("Ne&xt", "Move the focus to the next window", mdi_area.activateNextSubWindow, mdi_area, QKeySequence.NextChild) + +def CreatePreviousWindowAction(mdi_area): + return CreateAction("Pre&vious", "Move the focus to the previous window", mdi_area.activatePreviousSubWindow, mdi_area, QKeySequence.PreviousChild) + +# Typical MDI window menu + +class WindowMenu(): + + def __init__(self, mdi_area, menu): + self.mdi_area = mdi_area + self.window_menu = menu.addMenu("&Windows") + self.close_active_window = CreateCloseActiveWindowAction(mdi_area) + self.close_all_windows = CreateCloseAllWindowsAction(mdi_area) + self.tile_windows = CreateTileWindowsAction(mdi_area) + self.cascade_windows = CreateCascadeWindowsAction(mdi_area) + self.next_window = CreateNextWindowAction(mdi_area) + self.previous_window = CreatePreviousWindowAction(mdi_area) + self.window_menu.aboutToShow.connect(self.Update) + + def Update(self): + self.window_menu.clear() + sub_window_count = len(self.mdi_area.subWindowList()) + have_sub_windows = sub_window_count != 0 + self.close_active_window.setEnabled(have_sub_windows) + self.close_all_windows.setEnabled(have_sub_windows) + self.tile_windows.setEnabled(have_sub_windows) + self.cascade_windows.setEnabled(have_sub_windows) + self.next_window.setEnabled(have_sub_windows) + self.previous_window.setEnabled(have_sub_windows) + self.window_menu.addAction(self.close_active_window) + self.window_menu.addAction(self.close_all_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.tile_windows) + self.window_menu.addAction(self.cascade_windows) + self.window_menu.addSeparator() + self.window_menu.addAction(self.next_window) + self.window_menu.addAction(self.previous_window) + if sub_window_count == 0: + return + self.window_menu.addSeparator() + nr = 1 + for sub_window in self.mdi_area.subWindowList(): + label = str(nr) + " " + sub_window.name + if nr < 10: + label = "&" + label + action = self.window_menu.addAction(label) + action.setCheckable(True) + action.setChecked(sub_window == self.mdi_area.activeSubWindow()) + action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) + self.window_menu.addAction(action) + nr += 1 + + def setActiveSubWindow(self, nr): + self.mdi_area.setActiveSubWindow(self.mdi_area.subWindowList()[nr - 1]) + +# Help text + +glb_help_text = """ +<h1>Contents</h1> +<style> +p.c1 { + text-indent: 40px; +} +p.c2 { + text-indent: 80px; +} +} +</style> +<p class=c1><a href=#reports>1. Reports</a></p> +<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> +<p class=c2><a href=#allbranches>1.2 All branches</a></p> +<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> +<p class=c1><a href=#tables>2. Tables</a></p> +<h1 id=reports>1. Reports</h1> +<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> +The result is a GUI window with a tree representing a context-sensitive +call-graph. Expanding a couple of levels of the tree and adjusting column +widths to suit will display something like: +<pre> + Call Graph: pt_example +Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) +v- ls + v- 2638:2638 + v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 + |- unknown unknown 1 13198 0.1 1 0.0 + >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 + >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 + v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 + >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 + >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 + >- __libc_csu_init ls 1 10354 0.1 10 0.0 + |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 + v- main ls 1 8182043 99.6 180254 99.9 +</pre> +<h3>Points to note:</h3> +<ul> +<li>The top level is a command name (comm)</li> +<li>The next level is a thread (pid:tid)</li> +<li>Subsequent levels are functions</li> +<li>'Count' is the number of calls</li> +<li>'Time' is the elapsed time until the function returns</li> +<li>Percentages are relative to the level above</li> +<li>'Branch Count' is the total number of branches for that function and all functions that it calls +</ul> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds function names by either an exact match or a pattern match. +The pattern matching symbols are ? for any character and * for zero or more characters. +<h2 id=allbranches>1.2 All branches</h2> +The All branches report displays all branches in chronological order. +Not all data is fetched immediately. More records can be fetched using the Fetch bar provided. +<h3>Disassembly</h3> +Open a branch to display disassembly. This only works if: +<ol> +<li>The disassembler is available. Currently, only Intel XED is supported - see <a href=#xed>Intel XED Setup</a></li> +<li>The object code is available. Currently, only the perf build ID cache is searched for object code. +The default directory ~/.debug can be overridden by setting environment variable PERF_BUILDID_DIR. +One exception is kcore where the DSO long name is used (refer dsos_view on the Tables menu), +or alternatively, set environment variable PERF_KCORE to the kcore file name.</li> +</ol> +<h4 id=xed>Intel XED Setup</h4> +To use Intel XED, libxed.so must be present. To build and install libxed.so: +<pre> +git clone https://github.com/intelxed/mbuild.git mbuild +git clone https://github.com/intelxed/xed +cd xed +./mfile.py --share +sudo ./mfile.py --prefix=/usr/local install +sudo ldconfig +</pre> +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +<h2 id=selectedbranches>1.3 Selected branches</h2> +This is the same as the <a href=#allbranches>All branches</a> report but with the data reduced +by various selection criteria. A dialog box displays available criteria which are AND'ed together. +<h3>1.3.1 Time ranges</h3> +The time ranges hint text shows the total time range. Relative time ranges can also be entered in +ms, us or ns. Also, negative values are relative to the end of trace. Examples: +<pre> + 81073085947329-81073085958238 From 81073085947329 to 81073085958238 + 100us-200us From 100us to 200us + 10ms- From 10ms to the end + -100ns The first 100ns + -10ms- The last 10ms +</pre> +N.B. Due to the granularity of timestamps, there could be no branches in any given time range. +<h1 id=tables>2. Tables</h1> +The Tables menu shows all tables and views in the database. Most tables have an associated view +which displays the information in a more friendly way. Not all data for large tables is fetched +immediately. More records can be fetched using the Fetch bar provided. Columns can be sorted, +but that can be slow for large tables. +<p>There are also tables of database meta-information. +For SQLite3 databases, the sqlite_master table is included. +For PostgreSQL databases, information_schema.tables/views/columns are included. +<h3>Find</h3> +Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. +Refer to Python documentation for the regular expression syntax. +All columns are searched, but only currently fetched rows are searched. +<p>N.B. Results are found in id order, so if the table is re-ordered, find-next and find-previous +will go to the next/previous result in id order, instead of display order. +""" + +# Help window + +class HelpWindow(QMdiSubWindow): + + def __init__(self, glb, parent=None): + super(HelpWindow, self).__init__(parent) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setWidget(self.text) + + AddSubWindow(glb.mainwindow.mdi_area, self, "Exported SQL Viewer Help") + +# Main window that only displays the help text + +class HelpOnlyWindow(QMainWindow): + + def __init__(self, parent=None): + super(HelpOnlyWindow, self).__init__(parent) + + self.setMinimumSize(200, 100) + self.resize(800, 600) + self.setWindowTitle("Exported SQL Viewer Help") + self.setWindowIcon(self.style().standardIcon(QStyle.SP_MessageBoxInformation)) + + self.text = QTextBrowser() + self.text.setHtml(glb_help_text) + self.text.setReadOnly(True) + self.text.setOpenExternalLinks(True) + + self.setCentralWidget(self.text) + +# Font resize + +def ResizeFont(widget, diff): + font = widget.font() + sz = font.pointSize() + font.setPointSize(sz + diff) + widget.setFont(font) + +def ShrinkFont(widget): + ResizeFont(widget, -1) + +def EnlargeFont(widget): + ResizeFont(widget, 1) + +# Unique name for sub-windows + +def NumberedWindowName(name, nr): + if nr > 1: + name += " <" + str(nr) + ">" + return name + +def UniqueSubWindowName(mdi_area, name): + nr = 1 + while True: + unique_name = NumberedWindowName(name, nr) + ok = True + for sub_window in mdi_area.subWindowList(): + if sub_window.name == unique_name: + ok = False + break + if ok: + return unique_name + nr += 1 + +# Add a sub-window + +def AddSubWindow(mdi_area, sub_window, name): + unique_name = UniqueSubWindowName(mdi_area, name) + sub_window.setMinimumSize(200, 100) + sub_window.resize(800, 600) + sub_window.setWindowTitle(unique_name) + sub_window.setAttribute(Qt.WA_DeleteOnClose) + sub_window.setWindowIcon(sub_window.style().standardIcon(QStyle.SP_FileIcon)) + sub_window.name = unique_name + mdi_area.addSubWindow(sub_window) + sub_window.show() + +# Main window + +class MainWindow(QMainWindow): + + def __init__(self, glb, parent=None): + super(MainWindow, self).__init__(parent) + + self.glb = glb + + self.setWindowTitle("Exported SQL Viewer: " + glb.dbname) + self.setWindowIcon(self.style().standardIcon(QStyle.SP_ComputerIcon)) + self.setMinimumSize(200, 100) + + self.mdi_area = QMdiArea() + self.mdi_area.setHorizontalScrollBarPolicy(Qt.ScrollBarAsNeeded) + self.mdi_area.setVerticalScrollBarPolicy(Qt.ScrollBarAsNeeded) + + self.setCentralWidget(self.mdi_area) + + menu = self.menuBar() + + file_menu = menu.addMenu("&File") + file_menu.addAction(CreateExitAction(glb.app, self)) + + edit_menu = menu.addMenu("&Edit") + edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) + edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)])) + edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) + edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) + + reports_menu = menu.addMenu("&Reports") + reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) + + self.EventMenu(GetEventList(glb.db), reports_menu) + + self.TableMenu(GetTableList(glb), menu) + + self.window_menu = WindowMenu(self.mdi_area, menu) + + help_menu = menu.addMenu("&Help") + help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) + + def Find(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.find_bar.Activate() + except: + pass + + def FetchMoreRecords(self): + win = self.mdi_area.activeSubWindow() + if win: + try: + win.fetch_bar.Activate() + except: + pass + + def ShrinkFont(self): + win = self.mdi_area.activeSubWindow() + ShrinkFont(win.view) + + def EnlargeFont(self): + win = self.mdi_area.activeSubWindow() + EnlargeFont(win.view) + + def EventMenu(self, events, reports_menu): + branches_events = 0 + for event in events: + event = event.split(":")[0] + if event == "branches": + branches_events += 1 + dbid = 0 + for event in events: + dbid += 1 + event = event.split(":")[0] + if event == "branches": + label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) + label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" + reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) + + def TableMenu(self, tables, menu): + table_menu = menu.addMenu("&Tables") + for table in tables: + table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) + + def NewCallGraph(self): + CallGraphWindow(self.glb, self) + + def NewBranchView(self, event_id): + BranchWindow(self.glb, event_id, "", "", self) + + def NewSelectedBranchView(self, event_id): + dialog = SelectedBranchDialog(self.glb, self) + ret = dialog.exec_() + if ret: + BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) + + def NewTableView(self, table_name): + TableWindow(self.glb, table_name, self) + + def Help(self): + HelpWindow(self.glb, self) + +# XED Disassembler + +class xed_state_t(Structure): + + _fields_ = [ + ("mode", c_int), + ("width", c_int) + ] + +class XEDInstruction(): + + def __init__(self, libxed): + # Current xed_decoded_inst_t structure is 192 bytes. Use 512 to allow for future expansion + xedd_t = c_byte * 512 + self.xedd = xedd_t() + self.xedp = addressof(self.xedd) + libxed.xed_decoded_inst_zero(self.xedp) + self.state = xed_state_t() + self.statep = addressof(self.state) + # Buffer for disassembled instruction text + self.buffer = create_string_buffer(256) + self.bufferp = addressof(self.buffer) + +class LibXED(): + + def __init__(self): + try: + self.libxed = CDLL("libxed.so") + except: + self.libxed = None + if not self.libxed: + self.libxed = CDLL("/usr/local/lib/libxed.so") + + self.xed_tables_init = self.libxed.xed_tables_init + self.xed_tables_init.restype = None + self.xed_tables_init.argtypes = [] + + self.xed_decoded_inst_zero = self.libxed.xed_decoded_inst_zero + self.xed_decoded_inst_zero.restype = None + self.xed_decoded_inst_zero.argtypes = [ c_void_p ] + + self.xed_operand_values_set_mode = self.libxed.xed_operand_values_set_mode + self.xed_operand_values_set_mode.restype = None + self.xed_operand_values_set_mode.argtypes = [ c_void_p, c_void_p ] + + self.xed_decoded_inst_zero_keep_mode = self.libxed.xed_decoded_inst_zero_keep_mode + self.xed_decoded_inst_zero_keep_mode.restype = None + self.xed_decoded_inst_zero_keep_mode.argtypes = [ c_void_p ] + + self.xed_decode = self.libxed.xed_decode + self.xed_decode.restype = c_int + self.xed_decode.argtypes = [ c_void_p, c_void_p, c_uint ] + + self.xed_format_context = self.libxed.xed_format_context + self.xed_format_context.restype = c_uint + self.xed_format_context.argtypes = [ c_int, c_void_p, c_void_p, c_int, c_ulonglong, c_void_p, c_void_p ] + + self.xed_tables_init() + + def Instruction(self): + return XEDInstruction(self) + + def SetMode(self, inst, mode): + if mode: + inst.state.mode = 4 # 32-bit + inst.state.width = 4 # 4 bytes + else: + inst.state.mode = 1 # 64-bit + inst.state.width = 8 # 8 bytes + self.xed_operand_values_set_mode(inst.xedp, inst.statep) + + def DisassembleOne(self, inst, bytes_ptr, bytes_cnt, ip): + self.xed_decoded_inst_zero_keep_mode(inst.xedp) + err = self.xed_decode(inst.xedp, bytes_ptr, bytes_cnt) + if err: + return 0, "" + # Use AT&T mode (2), alternative is Intel (3) + ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) + if not ok: + return 0, "" + # Return instruction length and the disassembled instruction text + # For now, assume the length is in byte 166 + return inst.xedd[166], inst.buffer.value + +def TryOpen(file_name): + try: + return open(file_name, "rb") + except: + return None + +def Is64Bit(f): + result = sizeof(c_void_p) + # ELF support only + pos = f.tell() + f.seek(0) + header = f.read(7) + f.seek(pos) + magic = header[0:4] + eclass = ord(header[4]) + encoding = ord(header[5]) + version = ord(header[6]) + if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: + result = True if eclass == 2 else False + return result + +# Global data + +class Glb(): + + def __init__(self, dbref, db, dbname): + self.dbref = dbref + self.db = db + self.dbname = dbname + self.home_dir = os.path.expanduser("~") + self.buildid_dir = os.getenv("PERF_BUILDID_DIR") + if self.buildid_dir: + self.buildid_dir += "/.build-id/" + else: + self.buildid_dir = self.home_dir + "/.debug/.build-id/" + self.app = None + self.mainwindow = None + self.instances_to_shutdown_on_exit = weakref.WeakSet() + try: + self.disassembler = LibXED() + self.have_disassembler = True + except: + self.have_disassembler = False + + def FileFromBuildId(self, build_id): + file_name = self.buildid_dir + build_id[0:2] + "/" + build_id[2:] + "/elf" + return TryOpen(file_name) + + def FileFromNamesAndBuildId(self, short_name, long_name, build_id): + # Assume current machine i.e. no support for virtualization + if short_name[0:7] == "[kernel" and os.path.basename(long_name) == "kcore": + file_name = os.getenv("PERF_KCORE") + f = TryOpen(file_name) if file_name else None + if f: + return f + # For now, no special handling if long_name is /proc/kcore + f = TryOpen(long_name) + if f: + return f + f = self.FileFromBuildId(build_id) + if f: + return f + return None + + def AddInstanceToShutdownOnExit(self, instance): + self.instances_to_shutdown_on_exit.add(instance) + + # Shutdown any background processes or threads + def ShutdownInstances(self): + for x in self.instances_to_shutdown_on_exit: + try: + x.Shutdown() + except: + pass + +# Database reference + +class DBRef(): + + def __init__(self, is_sqlite3, dbname): + self.is_sqlite3 = is_sqlite3 + self.dbname = dbname + + def Open(self, connection_name): + dbname = self.dbname + if self.is_sqlite3: + db = QSqlDatabase.addDatabase("QSQLITE", connection_name) + else: + db = QSqlDatabase.addDatabase("QPSQL", connection_name) + opts = dbname.split() + for opt in opts: + if "=" in opt: + opt = opt.split("=") + if opt[0] == "hostname": + db.setHostName(opt[1]) + elif opt[0] == "port": + db.setPort(int(opt[1])) + elif opt[0] == "username": + db.setUserName(opt[1]) + elif opt[0] == "password": + db.setPassword(opt[1]) + elif opt[0] == "dbname": + dbname = opt[1] + else: + dbname = opt + + db.setDatabaseName(dbname) + if not db.open(): + raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) + return db, dbname + +# Main + +def Main(): + if (len(sys.argv) < 2): + print >> sys.stderr, "Usage is: exported-sql-viewer.py {<database name> | --help-only}" + raise Exception("Too few arguments") + + dbname = sys.argv[1] + if dbname == "--help-only": + app = QApplication(sys.argv) + mainwindow = HelpOnlyWindow() + mainwindow.show() + err = app.exec_() + sys.exit(err) + + is_sqlite3 = False + try: + f = open(dbname) + if f.read(15) == "SQLite format 3": + is_sqlite3 = True + f.close() + except: + pass + + dbref = DBRef(is_sqlite3, dbname) + db, dbname = dbref.Open("main") + glb = Glb(dbref, db, dbname) + app = QApplication(sys.argv) + glb.app = app + mainwindow = MainWindow(glb) + glb.mainwindow = mainwindow + mainwindow.show() + err = app.exec_() + glb.ShutdownInstances() + db.close() + sys.exit(err) + +if __name__ == "__main__": + Main() diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py new file mode 100644 index 000000000000..00e0e7476e55 --- /dev/null +++ b/tools/perf/scripts/python/powerpc-hcalls.py @@ -0,0 +1,200 @@ +# SPDX-License-Identifier: GPL-2.0+ +# +# Copyright (C) 2018 Ravi Bangoria, IBM Corporation +# +# Hypervisor call statisics + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +# output: { +# opcode: { +# 'min': minimum time nsec +# 'max': maximum time nsec +# 'time': average time nsec +# 'cnt': counter +# } ... +# } +output = {} + +# d_enter: { +# cpu: { +# opcode: nsec +# } ... +# } +d_enter = {} + +hcall_table = { + 4: 'H_REMOVE', + 8: 'H_ENTER', + 12: 'H_READ', + 16: 'H_CLEAR_MOD', + 20: 'H_CLEAR_REF', + 24: 'H_PROTECT', + 28: 'H_GET_TCE', + 32: 'H_PUT_TCE', + 36: 'H_SET_SPRG0', + 40: 'H_SET_DABR', + 44: 'H_PAGE_INIT', + 48: 'H_SET_ASR', + 52: 'H_ASR_ON', + 56: 'H_ASR_OFF', + 60: 'H_LOGICAL_CI_LOAD', + 64: 'H_LOGICAL_CI_STORE', + 68: 'H_LOGICAL_CACHE_LOAD', + 72: 'H_LOGICAL_CACHE_STORE', + 76: 'H_LOGICAL_ICBI', + 80: 'H_LOGICAL_DCBF', + 84: 'H_GET_TERM_CHAR', + 88: 'H_PUT_TERM_CHAR', + 92: 'H_REAL_TO_LOGICAL', + 96: 'H_HYPERVISOR_DATA', + 100: 'H_EOI', + 104: 'H_CPPR', + 108: 'H_IPI', + 112: 'H_IPOLL', + 116: 'H_XIRR', + 120: 'H_MIGRATE_DMA', + 124: 'H_PERFMON', + 220: 'H_REGISTER_VPA', + 224: 'H_CEDE', + 228: 'H_CONFER', + 232: 'H_PROD', + 236: 'H_GET_PPP', + 240: 'H_SET_PPP', + 244: 'H_PURR', + 248: 'H_PIC', + 252: 'H_REG_CRQ', + 256: 'H_FREE_CRQ', + 260: 'H_VIO_SIGNAL', + 264: 'H_SEND_CRQ', + 272: 'H_COPY_RDMA', + 276: 'H_REGISTER_LOGICAL_LAN', + 280: 'H_FREE_LOGICAL_LAN', + 284: 'H_ADD_LOGICAL_LAN_BUFFER', + 288: 'H_SEND_LOGICAL_LAN', + 292: 'H_BULK_REMOVE', + 304: 'H_MULTICAST_CTRL', + 308: 'H_SET_XDABR', + 312: 'H_STUFF_TCE', + 316: 'H_PUT_TCE_INDIRECT', + 332: 'H_CHANGE_LOGICAL_LAN_MAC', + 336: 'H_VTERM_PARTNER_INFO', + 340: 'H_REGISTER_VTERM', + 344: 'H_FREE_VTERM', + 348: 'H_RESET_EVENTS', + 352: 'H_ALLOC_RESOURCE', + 356: 'H_FREE_RESOURCE', + 360: 'H_MODIFY_QP', + 364: 'H_QUERY_QP', + 368: 'H_REREGISTER_PMR', + 372: 'H_REGISTER_SMR', + 376: 'H_QUERY_MR', + 380: 'H_QUERY_MW', + 384: 'H_QUERY_HCA', + 388: 'H_QUERY_PORT', + 392: 'H_MODIFY_PORT', + 396: 'H_DEFINE_AQP1', + 400: 'H_GET_TRACE_BUFFER', + 404: 'H_DEFINE_AQP0', + 408: 'H_RESIZE_MR', + 412: 'H_ATTACH_MCQP', + 416: 'H_DETACH_MCQP', + 420: 'H_CREATE_RPT', + 424: 'H_REMOVE_RPT', + 428: 'H_REGISTER_RPAGES', + 432: 'H_DISABLE_AND_GETC', + 436: 'H_ERROR_DATA', + 440: 'H_GET_HCA_INFO', + 444: 'H_GET_PERF_COUNT', + 448: 'H_MANAGE_TRACE', + 468: 'H_FREE_LOGICAL_LAN_BUFFER', + 472: 'H_POLL_PENDING', + 484: 'H_QUERY_INT_STATE', + 580: 'H_ILLAN_ATTRIBUTES', + 592: 'H_MODIFY_HEA_QP', + 596: 'H_QUERY_HEA_QP', + 600: 'H_QUERY_HEA', + 604: 'H_QUERY_HEA_PORT', + 608: 'H_MODIFY_HEA_PORT', + 612: 'H_REG_BCMC', + 616: 'H_DEREG_BCMC', + 620: 'H_REGISTER_HEA_RPAGES', + 624: 'H_DISABLE_AND_GET_HEA', + 628: 'H_GET_HEA_INFO', + 632: 'H_ALLOC_HEA_RESOURCE', + 644: 'H_ADD_CONN', + 648: 'H_DEL_CONN', + 664: 'H_JOIN', + 676: 'H_VASI_STATE', + 688: 'H_ENABLE_CRQ', + 696: 'H_GET_EM_PARMS', + 720: 'H_SET_MPP', + 724: 'H_GET_MPP', + 748: 'H_HOME_NODE_ASSOCIATIVITY', + 756: 'H_BEST_ENERGY', + 764: 'H_XIRR_X', + 768: 'H_RANDOM', + 772: 'H_COP', + 788: 'H_GET_MPP_X', + 796: 'H_SET_MODE', + 61440: 'H_RTAS', +} + +def hcall_table_lookup(opcode): + if (hcall_table.has_key(opcode)): + return hcall_table[opcode] + else: + return opcode + +print_ptrn = '%-28s%10s%10s%10s%10s' + +def trace_end(): + print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)') + print '-' * 68 + for opcode in output: + h_name = hcall_table_lookup(opcode) + time = output[opcode]['time'] + cnt = output[opcode]['cnt'] + min_t = output[opcode]['min'] + max_t = output[opcode]['max'] + + print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt) + +def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, + opcode, retval): + if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)): + diff = nsecs(sec, nsec) - d_enter[cpu][opcode] + + if (output.has_key(opcode)): + output[opcode]['time'] += diff + output[opcode]['cnt'] += 1 + if (output[opcode]['min'] > diff): + output[opcode]['min'] = diff + if (output[opcode]['max'] < diff): + output[opcode]['max'] = diff + else: + output[opcode] = { + 'time': diff, + 'cnt': 1, + 'min': diff, + 'max': diff, + } + + del d_enter[cpu][opcode] +# else: +# print "Can't find matching hcall_enter event. Ignoring sample" + +def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm, + callchain, opcode): + if (d_enter.has_key(cpu)): + d_enter[cpu][opcode] = nsecs(sec, nsec) + else: + d_enter[cpu] = {opcode: nsecs(sec, nsec)} diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index de66cb3b72c9..3473e7f66081 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py @@ -9,13 +9,17 @@ # This software is distributed under the terms of the GNU General # Public License ("GPL") version 2 as published by the Free Software # Foundation. - +from __future__ import print_function import os import sys from collections import defaultdict -from UserList import UserList +try: + from UserList import UserList +except ImportError: + # Python 3: UserList moved to the collections package + from collections import UserList sys.path.append(os.environ['PERF_EXEC_PATH'] + \ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') @@ -300,7 +304,7 @@ class TimeSliceList(UserList): if i == -1: return - for i in xrange(i, len(self.data)): + for i in range(i, len(self.data)): timeslice = self.data[i] if timeslice.start > end: return @@ -336,8 +340,8 @@ class SchedEventProxy: on_cpu_task = self.current_tsk[headers.cpu] if on_cpu_task != -1 and on_cpu_task != prev_pid: - print "Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ - (headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) + print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \ + headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid) threads[prev_pid] = prev_comm threads[next_pid] = next_comm diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 6c108fa79ae3..0b2b8305c965 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -21,6 +21,7 @@ perf-y += python-use.o perf-y += bp_signal.o perf-y += bp_signal_overflow.o perf-y += bp_account.o +perf-y += wp.o perf-y += task-exit.o perf-y += sw-clock.o perf-y += mmap-thread-lookup.o diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 37940665f736..efd0157b9d22 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -9,7 +9,7 @@ size=112 config=0 sample_period=* sample_type=263 -read_format=0 +read_format=0|4 disabled=1 inherit=1 pinned=0 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index 8a33ca4f9e1f..f0729c454f16 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -37,4 +37,3 @@ sample_freq=0 sample_period=0 freq=0 write_backward=0 -sample_id_all=0 diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 47bedf25ba69..96e7fc1ad3f9 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -16,8 +16,6 @@ static unsigned long *get_bitmap(const char *str, int nbits) bm = bitmap_alloc(nbits); if (map && bm) { - bitmap_zero(bm, nbits); - for (i = 0; i < map->nr; i++) set_bit(map->map[i], bm); } diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index cac8f8889bc3..12c09e0ece71 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -121,6 +121,16 @@ static struct test generic_tests[] = { .is_supported = test__bp_signal_is_supported, }, { + .desc = "Watchpoint", + .func = test__wp, + .is_supported = test__wp_is_supported, + .subtest = { + .skip_if_fail = false, + .get_nr = test__wp_subtest_get_nr, + .get_desc = test__wp_subtest_get_desc, + }, + }, + { .desc = "Number of exit events of a simple workload", .func = test__task_exit, }, @@ -385,7 +395,7 @@ static int test_and_print(struct test *t, bool force_skip, int subtest) if (!t->subtest.get_nr) pr_debug("%s:", t->desc); else - pr_debug("%s subtest %d:", t->desc, subtest); + pr_debug("%s subtest %d:", t->desc, subtest + 1); switch (err) { case TEST_OK: @@ -422,7 +432,7 @@ static const char *shell_test__description(char *description, size_t size, #define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (!is_directory(base, ent)) + if (!is_directory(base, ent) && ent->d_name[0] != '.') static const char *shell_tests__dir(char *path, size_t size) { @@ -654,6 +664,15 @@ static int perf_test__list(int argc, const char **argv) continue; pr_info("%2d: %s\n", i, t->desc); + + if (t->subtest.get_nr) { + int subn = t->subtest.get_nr(); + int subi; + + for (subi = 0; subi < subn; subi++) + pr_info("%2d:%1d: %s\n", i, subi + 1, + t->subtest.get_desc(subi)); + } } perf_test__list_shell(argc, argv, i); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 99936352df4f..6b049f3f5cf4 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -232,18 +232,18 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, u64 objdump_addr; const char *objdump_name; char decomp_name[KMOD_DECOMP_LEN]; + bool decomp = false; int ret; pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr); - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); - if (!al.map || !al.map->dso) { + if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) { if (cpumode == PERF_RECORD_MISC_HYPERVISOR) { pr_debug("Hypervisor address can not be resolved - skipping\n"); return 0; } - pr_debug("thread__find_addr_map failed\n"); + pr_debug("thread__find_map failed\n"); return -1; } @@ -306,6 +306,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, return -1; } + decomp = true; objdump_name = decomp_name; } @@ -313,7 +314,7 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, objdump_addr = map__rip_2objdump(al.map, al.addr); ret = read_via_objdump(objdump_name, objdump_addr, buf2, len); - if (dso__needs_decompress(al.map->dso)) + if (decomp) unlink(objdump_name); if (ret > 0) { @@ -561,6 +562,7 @@ static int do_test_code_reading(bool try_kcore) pid = getpid(); machine = machine__new_host(); + machine->env = &perf_env; ret = machine__create_kernel_maps(machine); if (ret < 0) { diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 699561fa512c..5f8501c68da4 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -8,7 +8,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, int size, bool should_be_signed) { - struct format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = perf_evsel__field(evsel, name); int is_signed; int ret = 0; @@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name, return -1; } - is_signed = !!(field->flags | FIELD_IS_SIGNED); + is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED); if (should_be_signed && !is_signed) { pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n", evsel->name, name, is_signed, should_be_signed); diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index f7c5b613d667..b889a28fd80b 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -131,20 +131,20 @@ struct machine *setup_fake_machine(struct machines *machines) goto out; /* emulate dso__load() */ - dso__set_loaded(dso, MAP__FUNCTION); + dso__set_loaded(dso); for (k = 0; k < fake_symbols[i].nr_syms; k++) { struct symbol *sym; struct fake_sym *fsym = &fake_symbols[i].syms[k]; sym = symbol__new(fsym->start, fsym->length, - STB_GLOBAL, fsym->name); + STB_GLOBAL, STT_FUNC, fsym->name); if (sym == NULL) { dso__put(dso); goto out; } - symbols__insert(&dso->symbols[MAP__FUNCTION], sym); + symbols__insert(&dso->symbols, sym); } dso__put(dso); diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c index 8e57d46109de..0579a70bbbff 100644 --- a/tools/perf/tests/kmod-path.c +++ b/tools/perf/tests/kmod-path.c @@ -5,34 +5,28 @@ #include "dso.h" #include "debug.h" -static int test(const char *path, bool alloc_name, bool alloc_ext, - bool kmod, bool comp, const char *name, const char *ext) +static int test(const char *path, bool alloc_name, bool kmod, + int comp, const char *name) { struct kmod_path m; memset(&m, 0x0, sizeof(m)); TEST_ASSERT_VAL("kmod_path__parse", - !__kmod_path__parse(&m, path, alloc_name, alloc_ext)); + !__kmod_path__parse(&m, path, alloc_name)); - pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n", - path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext); + pr_debug("%s - alloc name %d, kmod %d, comp %d, name '%s'\n", + path, alloc_name, m.kmod, m.comp, m.name); TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod); TEST_ASSERT_VAL("wrong comp", m.comp == comp); - if (ext) - TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext)); - else - TEST_ASSERT_VAL("wrong ext", !m.ext); - if (name) TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name)); else TEST_ASSERT_VAL("wrong name", !m.name); free(m.name); - free(m.ext); return 0; } @@ -45,102 +39,118 @@ static int test_is_kernel_module(const char *path, int cpumode, bool expect) return 0; } -#define T(path, an, ae, k, c, n, e) \ - TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e)) +#define T(path, an, k, c, n) \ + TEST_ASSERT_VAL("failed", !test(path, an, k, c, n)) #define M(path, c, e) \ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused) { - /* path alloc_name alloc_ext kmod comp name ext */ - T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); - T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL); - T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL); - T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL); + /* path alloc_name kmod comp name */ + T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]"); + T("/xxxx/xxxx/x-x.ko", false , true, 0 , NULL ); + T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]"); + T("/xxxx/xxxx/x-x.ko", false , true, 0 , NULL ); M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true); M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false); #ifdef HAVE_ZLIB_SUPPORT - /* path alloc_name alloc_ext kmod comp name ext */ - T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); - T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); - T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL); - T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL); + /* path alloc_name kmod comp name */ + T("/xxxx/xxxx/x.ko.gz", true , true, 1 , "[x]"); + T("/xxxx/xxxx/x.ko.gz", false , true, 1 , NULL ); + T("/xxxx/xxxx/x.ko.gz", true , true, 1 , "[x]"); + T("/xxxx/xxxx/x.ko.gz", false , true, 1 , NULL ); M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true); M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz"); - T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz"); - T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL); - T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL); + /* path alloc_name kmod comp name */ + T("/xxxx/xxxx/x.gz", true , false, 1 , "x.gz"); + T("/xxxx/xxxx/x.gz", false , false, 1 , NULL ); + T("/xxxx/xxxx/x.gz", true , false, 1 , "x.gz"); + T("/xxxx/xxxx/x.gz", false , false, 1 , NULL ); M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false); M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("x.gz", true , true , false, true, "x.gz", "gz"); - T("x.gz", false , true , false, true, NULL , "gz"); - T("x.gz", true , false , false, true, "x.gz", NULL); - T("x.gz", false , false , false, true, NULL , NULL); + /* path alloc_name kmod comp name */ + T("x.gz", true , false, 1 , "x.gz"); + T("x.gz", false , false, 1 , NULL ); + T("x.gz", true , false, 1 , "x.gz"); + T("x.gz", false , false, 1 , NULL ); M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); M("x.gz", PERF_RECORD_MISC_KERNEL, false); M("x.gz", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("x.ko.gz", true , true , true, true, "[x]", "gz"); - T("x.ko.gz", false , true , true, true, NULL , "gz"); - T("x.ko.gz", true , false , true, true, "[x]", NULL); - T("x.ko.gz", false , false , true, true, NULL , NULL); + /* path alloc_name kmod comp name */ + T("x.ko.gz", true , true, 1 , "[x]"); + T("x.ko.gz", false , true, 1 , NULL ); + T("x.ko.gz", true , true, 1 , "[x]"); + T("x.ko.gz", false , true, 1 , NULL ); M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true); M("x.ko.gz", PERF_RECORD_MISC_USER, false); #endif - /* path alloc_name alloc_ext kmod comp name ext */ - T("[test_module]", true , true , true, false, "[test_module]", NULL); - T("[test_module]", false , true , true, false, NULL , NULL); - T("[test_module]", true , false , true, false, "[test_module]", NULL); - T("[test_module]", false , false , true, false, NULL , NULL); + /* path alloc_name kmod comp name */ + T("[test_module]", true , true, false, "[test_module]"); + T("[test_module]", false , true, false, NULL ); + T("[test_module]", true , true, false, "[test_module]"); + T("[test_module]", false , true, false, NULL ); M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("[test_module]", PERF_RECORD_MISC_KERNEL, true); M("[test_module]", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("[test.module]", true , true , true, false, "[test.module]", NULL); - T("[test.module]", false , true , true, false, NULL , NULL); - T("[test.module]", true , false , true, false, "[test.module]", NULL); - T("[test.module]", false , false , true, false, NULL , NULL); + /* path alloc_name kmod comp name */ + T("[test.module]", true , true, false, "[test.module]"); + T("[test.module]", false , true, false, NULL ); + T("[test.module]", true , true, false, "[test.module]"); + T("[test.module]", false , true, false, NULL ); M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); M("[test.module]", PERF_RECORD_MISC_KERNEL, true); M("[test.module]", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("[vdso]", true , true , false, false, "[vdso]", NULL); - T("[vdso]", false , true , false, false, NULL , NULL); - T("[vdso]", true , false , false, false, "[vdso]", NULL); - T("[vdso]", false , false , false, false, NULL , NULL); + /* path alloc_name kmod comp name */ + T("[vdso]", true , false, false, "[vdso]"); + T("[vdso]", false , false, false, NULL ); + T("[vdso]", true , false, false, "[vdso]"); + T("[vdso]", false , false, false, NULL ); M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); M("[vdso]", PERF_RECORD_MISC_KERNEL, false); M("[vdso]", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL); - T("[vsyscall]", false , true , false, false, NULL , NULL); - T("[vsyscall]", true , false , false, false, "[vsyscall]", NULL); - T("[vsyscall]", false , false , false, false, NULL , NULL); + T("[vdso32]", true , false, false, "[vdso32]"); + T("[vdso32]", false , false, false, NULL ); + T("[vdso32]", true , false, false, "[vdso32]"); + T("[vdso32]", false , false, false, NULL ); + M("[vdso32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vdso32]", PERF_RECORD_MISC_KERNEL, false); + M("[vdso32]", PERF_RECORD_MISC_USER, false); + + T("[vdsox32]", true , false, false, "[vdsox32]"); + T("[vdsox32]", false , false, false, NULL ); + T("[vdsox32]", true , false, false, "[vdsox32]"); + T("[vdsox32]", false , false, false, NULL ); + M("[vdsox32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vdsox32]", PERF_RECORD_MISC_KERNEL, false); + M("[vdsox32]", PERF_RECORD_MISC_USER, false); + + /* path alloc_name kmod comp name */ + T("[vsyscall]", true , false, false, "[vsyscall]"); + T("[vsyscall]", false , false, false, NULL ); + T("[vsyscall]", true , false, false, "[vsyscall]"); + T("[vsyscall]", false , false, false, NULL ); M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false); M("[vsyscall]", PERF_RECORD_MISC_USER, false); - /* path alloc_name alloc_ext kmod comp name ext */ - T("[kernel.kallsyms]", true , true , false, false, "[kernel.kallsyms]", NULL); - T("[kernel.kallsyms]", false , true , false, false, NULL , NULL); - T("[kernel.kallsyms]", true , false , false, false, "[kernel.kallsyms]", NULL); - T("[kernel.kallsyms]", false , false , false, false, NULL , NULL); + /* path alloc_name kmod comp name */ + T("[kernel.kallsyms]", true , false, false, "[kernel.kallsyms]"); + T("[kernel.kallsyms]", false , false, false, NULL ); + T("[kernel.kallsyms]", true , false, false, "[kernel.kallsyms]"); + T("[kernel.kallsyms]", false , false, false, NULL ); M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false); M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false); diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index 0c3c87f86e03..9e9e4d37cc77 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -24,8 +24,6 @@ static unsigned long *get_bitmap(const char *str, int nbits) bm = bitmap_alloc(nbits); if (map && bm) { - bitmap_zero(bm, nbits); - for (i = 0; i < map->nr; i++) { set_bit(map->map[i], bm); } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 868d82b501f4..b1af2499a3c9 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -188,9 +188,8 @@ static int mmap_events(synth_cb synth) pr_debug("looking for map %p\n", td->map); - thread__find_addr_map(thread, - PERF_RECORD_MISC_USER, MAP__FUNCTION, - (unsigned long) (td->map + 1), &al); + thread__find_map(thread, PERF_RECORD_MISC_USER, + (unsigned long) (td->map + 1), &al); thread__put(thread); @@ -218,7 +217,7 @@ static int mmap_events(synth_cb synth) * perf_event__synthesize_threads (global) * * We test we can find all memory maps via: - * thread__find_addr_map + * thread__find_map * * by using all thread objects. */ diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 18b06444f230..3b97ac018d5a 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -499,7 +499,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); - TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel)); TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); /* cpu/config=2,call-graph=no,time=0,period=2000/ */ @@ -512,7 +512,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlis * while this test executes only parse events method. */ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); - TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); + TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel)); TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); return 0; @@ -1309,18 +1309,39 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist) return 0; } +static bool test__intel_pt_valid(void) +{ + return !!perf_pmu__find("intel_pt"); +} + +static int test__intel_pt(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0); + return 0; +} + +static int test__checkevent_complex_name(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0); + return 0; +} + static int count_tracepoints(void) { struct dirent *events_ent; DIR *events_dir; int cnt = 0; - events_dir = opendir(tracing_events_path); + events_dir = tracing_events__opendir(); TEST_ASSERT_VAL("Can't open events dir", events_dir); while ((events_ent = readdir(events_dir))) { - char sys_path[PATH_MAX]; + char *sys_path; struct dirent *sys_ent; DIR *sys_dir; @@ -1331,8 +1352,8 @@ static int count_tracepoints(void) || !strcmp(events_ent->d_name, "header_page")) continue; - scnprintf(sys_path, PATH_MAX, "%s/%s", - tracing_events_path, events_ent->d_name); + sys_path = get_events_file(events_ent->d_name); + TEST_ASSERT_VAL("Can't get sys path", sys_path); sys_dir = opendir(sys_path); TEST_ASSERT_VAL("Can't open sys dir", sys_dir); @@ -1348,6 +1369,7 @@ static int count_tracepoints(void) } closedir(sys_dir); + put_events_file(sys_path); } closedir(events_dir); @@ -1366,6 +1388,7 @@ struct evlist_test { const char *name; __u32 type; const int id; + bool (*valid)(void); int (*check)(struct perf_evlist *evlist); }; @@ -1637,6 +1660,17 @@ static struct evlist_test test__events[] = { .check = test__checkevent_config_cache, .id = 51, }, + { + .name = "intel_pt//u", + .valid = test__intel_pt_valid, + .check = test__intel_pt, + .id = 52, + }, + { + .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", + .check = test__checkevent_complex_name, + .id = 53 + } }; static struct evlist_test test__events_pmu[] = { @@ -1655,6 +1689,11 @@ static struct evlist_test test__events_pmu[] = { .check = test__checkevent_pmu_partial_time_callgraph, .id = 2, }, + { + .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp", + .check = test__checkevent_complex_name, + .id = 3, + } }; struct terms_test { @@ -1672,17 +1711,24 @@ static struct terms_test test__terms[] = { static int test_event(struct evlist_test *e) { + struct parse_events_error err = { .idx = 0, }; struct perf_evlist *evlist; int ret; + if (e->valid && !e->valid()) { + pr_debug("... SKIP"); + return 0; + } + evlist = perf_evlist__new(); if (evlist == NULL) return -ENOMEM; - ret = parse_events(evlist, e->name, NULL); + ret = parse_events(evlist, e->name, &err); if (ret) { - pr_debug("failed to parse event '%s', err %d\n", - e->name, ret); + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + e->name, ret, err.str); + parse_events_print_error(&err, e->name); } else { ret = e->check(evlist); } @@ -1700,10 +1746,11 @@ static int test_events(struct evlist_test *events, unsigned cnt) for (i = 0; i < cnt; i++) { struct evlist_test *e = &events[i]; - pr_debug("running test %d '%s'\n", e->id, e->name); + pr_debug("running test %d '%s'", e->id, e->name); ret1 = test_event(e); if (ret1) ret2 = ret1; + pr_debug("\n"); } return ret2; @@ -1785,7 +1832,7 @@ static int test_pmu_events(void) } while (!ret && (ent = readdir(dir))) { - struct evlist_test e; + struct evlist_test e = { .id = 0, }; char name[2 * NAME_MAX + 1 + 12 + 3]; /* Names containing . are special and cannot be used directly */ diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c index 5d2df65ada6a..40ab72149ce1 100644 --- a/tools/perf/tests/python-use.c +++ b/tools/perf/tests/python-use.c @@ -7,8 +7,7 @@ #include <stdlib.h> #include <linux/compiler.h> #include "tests.h" - -extern int verbose; +#include "util/debug.h" int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused) { diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index ee86473643be..cab7b0aea6ea 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -11,38 +11,62 @@ . $(dirname $0)/lib/probe.sh libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') -nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 +nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254 + +event_pattern='probe_libc:inet_pton(\_[[:digit:]]+)?' + +add_libc_inet_pton_event() { + + event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \ + grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))") + + if [ $? -ne 0 -o -z "$event_name" ] ; then + printf "FAIL: could not add event\n" + return 1 + fi +} trace_libc_inet_pton_backtrace() { - idx=0 - expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" - expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$" + + expected=`mktemp -u /tmp/expected.XXX` + + echo "ping[][0-9 \.:]+$event_name: \([[:xdigit:]]+\)" > $expected + echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - expected[2]="gaih_inet.*[[:space:]]\($libc|inlined\)$" - expected[3]="(__GI_)?getaddrinfo[[:space:]]\($libc|inlined\)$" - expected[4]="main[[:space:]]\(.*/bin/ping.*\)$" + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + ;; + ppc64|ppc64le) + eventattr='max-stack=4' + echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + echo ".*\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; *) eventattr='max-stack=3' - expected[2]="getaddrinfo[[:space:]]\($libc\)$" - expected[3]=".*\(.*/bin/ping.*\)$" + echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected + echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected ;; esac - file=`mktemp -u /tmp/perf.data.XXX` + perf_data=`mktemp -u /tmp/perf.data.XXX` + perf_script=`mktemp -u /tmp/perf.script.XXX` + perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1 + perf script -i $perf_data > $perf_script - perf record -e probe_libc:inet_pton/$eventattr/ -o $file ping -6 -c 1 ::1 > /dev/null 2>&1 - perf script -i $file | while read line ; do + exec 3<$perf_script + exec 4<$expected + while read line <&3 && read -r pattern <&4; do + [ -z "$pattern" ] && break echo $line - echo "$line" | egrep -q "${expected[$idx]}" + echo "$line" | egrep -q "$pattern" if [ $? -ne 0 ] ; then - printf "FAIL: expected backtrace entry %d \"%s\" got \"%s\"\n" $idx "${expected[$idx]}" "$line" - exit 1 + printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line" + return 1 fi - let idx+=1 - [ -z "${expected[$idx]}" ] && break done # If any statements are executed from this point onwards, @@ -51,13 +75,20 @@ trace_libc_inet_pton_backtrace() { # even if the perf script output does not match. } +delete_libc_inet_pton_event() { + + if [ -n "$event_name" ] ; then + perf probe -q -d $event_name + fi +} + # Check for IPv6 interface existence ip a sh lo | fgrep -q inet6 || exit 2 skip_if_no_perf_probe && \ -perf probe -q $libc inet_pton && \ +add_libc_inet_pton_event && \ trace_libc_inet_pton_backtrace err=$? -rm -f ${file} -perf probe -q -d probe_libc:inet_pton +rm -f ${perf_data} ${perf_script} ${expected} +delete_libc_inet_pton_event exit $err diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 55ad9793d544..4ce276efe6b4 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,7 +17,7 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') perf trace -e $evts touch $file 2>&1 | \ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index a9760e790563..b82f55fcc294 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -59,6 +59,9 @@ int test__python_use(struct test *test, int subtest); int test__bp_signal(struct test *test, int subtest); int test__bp_signal_overflow(struct test *test, int subtest); int test__bp_accounting(struct test *test, int subtest); +int test__wp(struct test *test, int subtest); +const char *test__wp_subtest_get_desc(int subtest); +int test__wp_subtest_get_nr(void); int test__task_exit(struct test *test, int subtest); int test__mem(struct test *test, int subtest); int test__sw_clock_freq(struct test *test, int subtest); @@ -106,6 +109,7 @@ int test__unit_number__scnprint(struct test *test, int subtest); int test__mem2node(struct test *t, int subtest); bool test__bp_signal_is_supported(void); +bool test__wp_is_supported(void); #if defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 40e30a26b23c..9497d02f69e6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -45,6 +45,7 @@ static int session_write_header(char *path) perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_ARCH); session->header.data_size += DATA_SIZE; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 1e5adb65632a..7691980b7df1 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -19,8 +19,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest struct symbol *sym; struct map *kallsyms_map, *vmlinux_map, *map; struct machine kallsyms, vmlinux; - enum map_type type = MAP__FUNCTION; - struct maps *maps = &vmlinux.kmaps.maps[type]; + struct maps *maps = machine__kernel_maps(&vmlinux); u64 mem_start, mem_end; bool header_printed; @@ -56,7 +55,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * be compacted against the list of modules found in the "vmlinux" * code and with the one got from /proc/modules from the "kallsyms" code. */ - if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) { + if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) { pr_debug("dso__load_kallsyms "); goto out; } @@ -94,7 +93,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines * to fixup the symbols. */ - if (machine__load_vmlinux_path(&vmlinux, type) <= 0) { + if (machine__load_vmlinux_path(&vmlinux) <= 0) { pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n"); err = TEST_SKIP; goto out; @@ -108,7 +107,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest * in the kallsyms dso. For the ones that are in both, check its names and * end addresses too. */ - for (nd = rb_first(&vmlinux_map->dso->symbols[type]); nd; nd = rb_next(nd)) { + map__for_each_symbol(vmlinux_map, sym, nd) { struct symbol *pair, *first_pair; sym = rb_entry(nd, struct symbol, rb_node); @@ -119,8 +118,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start); mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end); - first_pair = machine__find_kernel_symbol(&kallsyms, type, - mem_start, NULL); + first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL); pair = first_pair; if (pair && UM(pair->start) == mem_start) { @@ -149,7 +147,7 @@ next_pair: */ continue; } else { - pair = machine__find_kernel_symbol_by_name(&kallsyms, type, sym->name, NULL); + pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL); if (pair) { if (UM(pair->start) == mem_start) goto next_pair; @@ -183,7 +181,7 @@ next_pair: * so use the short name, less descriptive but the same ("[kernel]" in * both cases. */ - pair = map_groups__find_by_name(&kallsyms.kmaps, type, + pair = map_groups__find_by_name(&kallsyms.kmaps, (map->dso->kernel ? map->dso->short_name : map->dso->name)); @@ -206,7 +204,7 @@ next_pair: mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start); mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end); - pair = map_groups__find(&kallsyms.kmaps, type, mem_start); + pair = map_groups__find(&kallsyms.kmaps, mem_start); if (pair == NULL || pair->priv) continue; @@ -228,7 +226,7 @@ next_pair: header_printed = false; - maps = &kallsyms.kmaps.maps[type]; + maps = machine__kernel_maps(&kallsyms); for (map = maps__first(maps); map; map = map__next(map)) { if (!map->priv) { diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c new file mode 100644 index 000000000000..f89e6806557b --- /dev/null +++ b/tools/perf/tests/wp.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <sys/ioctl.h> +#include <linux/hw_breakpoint.h> +#include "tests.h" +#include "debug.h" +#include "cloexec.h" + +#define WP_TEST_ASSERT_VAL(fd, text, val) \ +do { \ + long long count; \ + wp_read(fd, &count, sizeof(long long)); \ + TEST_ASSERT_VAL(text, count == val); \ +} while (0) + +volatile u64 data1; +volatile u8 data2[3]; + +static int wp_read(int fd, long long *count, int size) +{ + int ret = read(fd, count, size); + + if (ret != size) { + pr_debug("failed to read: %d\n", ret); + return -1; + } + return 0; +} + +static void get__perf_event_attr(struct perf_event_attr *attr, int wp_type, + void *wp_addr, unsigned long wp_len) +{ + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->config = 0; + attr->bp_type = wp_type; + attr->bp_addr = (unsigned long)wp_addr; + attr->bp_len = wp_len; + attr->sample_period = 1; + attr->sample_type = PERF_SAMPLE_IP; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; +} + +static int __event(int wp_type, void *wp_addr, unsigned long wp_len) +{ + int fd; + struct perf_event_attr attr; + + get__perf_event_attr(&attr, wp_type, wp_addr, wp_len); + fd = sys_perf_event_open(&attr, 0, -1, -1, + perf_event_open_cloexec_flag()); + if (fd < 0) + pr_debug("failed opening event %x\n", attr.bp_type); + + return fd; +} + +static int wp_ro_test(void) +{ + int fd; + unsigned long tmp, tmp1 = rand(); + + fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1)); + if (fd < 0) + return -1; + + tmp = data1; + WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1); + + data1 = tmp1 + tmp; + WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1); + + close(fd); + return 0; +} + +static int wp_wo_test(void) +{ + int fd; + unsigned long tmp, tmp1 = rand(); + + fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); + if (fd < 0) + return -1; + + tmp = data1; + WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0); + + data1 = tmp1 + tmp; + WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 1); + + close(fd); + return 0; +} + +static int wp_rw_test(void) +{ + int fd; + unsigned long tmp, tmp1 = rand(); + + fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1, + sizeof(data1)); + if (fd < 0) + return -1; + + tmp = data1; + WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1); + + data1 = tmp1 + tmp; + WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 2); + + close(fd); + return 0; +} + +static int wp_modify_test(void) +{ + int fd, ret; + unsigned long tmp = rand(); + struct perf_event_attr new_attr; + + fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1)); + if (fd < 0) + return -1; + + data1 = tmp; + WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1); + + /* Modify watchpoint with disabled = 1 */ + get__perf_event_attr(&new_attr, HW_BREAKPOINT_W, (void *)&data2[0], + sizeof(u8) * 2); + new_attr.disabled = 1; + ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr); + if (ret < 0) { + pr_debug("ioctl(PERF_EVENT_IOC_MODIFY_ATTRIBUTES) failed\n"); + close(fd); + return ret; + } + + data2[1] = tmp; /* Not Counted */ + WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1); + + /* Enable the event */ + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); + if (ret < 0) { + pr_debug("Failed to enable event\n"); + close(fd); + return ret; + } + + data2[1] = tmp; /* Counted */ + WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2); + + data2[2] = tmp; /* Not Counted */ + WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 2); + + close(fd); + return 0; +} + +static bool wp_ro_supported(void) +{ +#if defined (__x86_64__) || defined (__i386__) + return false; +#else + return true; +#endif +} + +static void wp_ro_skip_msg(void) +{ +#if defined (__x86_64__) || defined (__i386__) + pr_debug("Hardware does not support read only watchpoints.\n"); +#endif +} + +static struct { + const char *desc; + int (*target_func)(void); + bool (*is_supported)(void); + void (*skip_msg)(void); +} wp_testcase_table[] = { + { + .desc = "Read Only Watchpoint", + .target_func = &wp_ro_test, + .is_supported = &wp_ro_supported, + .skip_msg = &wp_ro_skip_msg, + }, + { + .desc = "Write Only Watchpoint", + .target_func = &wp_wo_test, + }, + { + .desc = "Read / Write Watchpoint", + .target_func = &wp_rw_test, + }, + { + .desc = "Modify Watchpoint", + .target_func = &wp_modify_test, + }, +}; + +int test__wp_subtest_get_nr(void) +{ + return (int)ARRAY_SIZE(wp_testcase_table); +} + +const char *test__wp_subtest_get_desc(int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return NULL; + return wp_testcase_table[i].desc; +} + +int test__wp(struct test *test __maybe_unused, int i) +{ + if (i < 0 || i >= (int)ARRAY_SIZE(wp_testcase_table)) + return TEST_FAIL; + + if (wp_testcase_table[i].is_supported && + !wp_testcase_table[i].is_supported()) { + wp_testcase_table[i].skip_msg(); + return TEST_SKIP; + } + + return !wp_testcase_table[i].target_func() ? TEST_OK : TEST_FAIL; +} + +/* The s390 so far does not have support for + * instruction breakpoint using the perf_event_open() system call. + */ +bool test__wp_is_supported(void) +{ +#if defined(__s390x__) + return false; +#else + return true; +#endif +} diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 66330d4b739b..304313073242 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -5,6 +5,9 @@ ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif libperf-y += kcmp.o +libperf-y += mount_flags.o libperf-y += pkey_alloc.o libperf-y += prctl.o +libperf-y += sockaddr.o +libperf-y += socket.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 984a504d335c..039c29039b2c 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -24,15 +24,43 @@ struct strarray { } size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val); +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags); struct trace; struct thread; size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size); +extern struct strarray strarray__socket_families; + +/** + * augmented_arg: extra payload for syscall pointer arguments + + * If perf_sample->raw_size is more than what a syscall sys_enter_FOO puts, + * then its the arguments contents, so that we can show more than just a + * pointer. This will be done initially with eBPF, the start of that is at the + * tools/perf/examples/bpf/augmented_syscalls.c example for the openat, but + * will eventually be done automagically caching the running kernel tracefs + * events data into an eBPF C script, that then gets compiled and its .o file + * cached for subsequent use. For char pointers like the ones for 'open' like + * syscalls its easy, for the rest we should use DWARF or better, BTF, much + * more compact. + * + * @size: 8 if all we need is an integer, otherwise all of the augmented arg. + * @int_arg: will be used for integer like pointer contents, like 'accept's 'upeer_addrlen' + * @value: u64 aligned, for structs, pathnames + */ +struct augmented_arg { + int size; + int int_arg; + u64 value[]; +}; + /** * @val: value of syscall argument being formatted * @args: All the args, use syscall_args__val(arg, nth) to access one + * @augmented_args: Extra data that can be collected, for instance, with eBPF for expanding the pathname for open, etc + * @augmented_args_size: augmented_args total payload size * @thread: tid state (maps, pid, tid, etc) * @trace: 'perf trace' internals: all threads, etc * @parm: private area, may be an strarray, for instance @@ -43,6 +71,10 @@ size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_ struct syscall_arg { unsigned long val; unsigned char *args; + struct { + struct augmented_arg *args; + int size; + } augmented; struct thread *thread; struct trace *trace; void *parm; @@ -91,6 +123,12 @@ size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg); #define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigned long flags); +#define SCAMV_MOUNT_FLAGS syscall_arg__mask_val_mount_flags + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights @@ -106,6 +144,12 @@ size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_a size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg); #define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3 +size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_SOCKADDR syscall_arg__scnprintf_sockaddr + +size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol + size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c index d64d049ab991..010406500c30 100644 --- a/tools/perf/trace/beauty/clone.c +++ b/tools/perf/trace/beauty/clone.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/cone.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh index 2149d3a98e42..9aa94fd523a9 100755 --- a/tools/perf/trace/beauty/drm_ioctl.sh +++ b/tools/perf/trace/beauty/drm_ioctl.sh @@ -1,13 +1,15 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/ -drm_header_dir=$1 printf "#ifndef DRM_COMMAND_BASE\n" -grep "#define DRM_COMMAND_BASE" $drm_header_dir/drm.h +grep "#define DRM_COMMAND_BASE" $header_dir/drm.h printf "#endif\n" printf "static const char *drm_ioctl_cmds[] = {\n" -grep "^#define DRM_IOCTL.*DRM_IO" $drm_header_dir/drm.h | \ +grep "^#define DRM_IOCTL.*DRM_IO" $header_dir/drm.h | \ sed -r 's/^#define +DRM_IOCTL_([A-Z0-9_]+)[ ]+DRM_IO[A-Z]* *\( *(0x[[:xdigit:]]+),*.*/ [\2] = "\1",/g' -grep "^#define DRM_I915_[A-Z_0-9]\+[ ]\+0x" $drm_header_dir/i915_drm.h | \ +grep "^#define DRM_I915_[A-Z_0-9]\+[ ]\+0x" $header_dir/i915_drm.h | \ sed -r 's/^#define +DRM_I915_([A-Z0-9_]+)[ ]+(0x[[:xdigit:]]+)/\t[DRM_COMMAND_BASE + \2] = "I915_\1",/g' printf "};\n" diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c index 5d6a477a6400..db5b9b492113 100644 --- a/tools/perf/trace/beauty/eventfd.c +++ b/tools/perf/trace/beauty/eventfd.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef EFD_SEMAPHORE #define EFD_SEMAPHORE 1 #endif diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c index 9e8900c13cb1..e6de31674e24 100644 --- a/tools/perf/trace/beauty/fcntl.c +++ b/tools/perf/trace/beauty/fcntl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/fcntl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c index c4ff6ad30b06..cf02ae5f0ba6 100644 --- a/tools/perf/trace/beauty/flock.c +++ b/tools/perf/trace/beauty/flock.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include "trace/beauty/beauty.h" #include <linux/kernel.h> diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c index 61850fbc85ff..1136bde56406 100644 --- a/tools/perf/trace/beauty/futex_op.c +++ b/tools/perf/trace/beauty/futex_op.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <linux/futex.h> #ifndef FUTEX_WAIT_BITSET diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c index 26f6b3253511..138b7d588a70 100644 --- a/tools/perf/trace/beauty/futex_val3.c +++ b/tools/perf/trace/beauty/futex_val3.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <linux/futex.h> #ifndef FUTEX_BITSET_MATCH_ANY diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 1be3b4cf0827..eae59ad15ce3 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/ioctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" @@ -32,6 +31,7 @@ static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size) "TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK", "TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER", + "TIOCGISO7816", "TIOCSISO7816", [_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG", "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS", "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI", diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c index f62040eb9d5c..b276a274f203 100644 --- a/tools/perf/trace/beauty/kcmp.c +++ b/tools/perf/trace/beauty/kcmp.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/kcmp.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh index 40d063b8c082..df8b17486d57 100755 --- a/tools/perf/trace/beauty/kcmp_type.sh +++ b/tools/perf/trace/beauty/kcmp_type.sh @@ -1,6 +1,7 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *kcmp_types[] = {\n" regex='^[[:space:]]+(KCMP_(\w+)),' diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh index bd28817afced..4ce54f5bf756 100755 --- a/tools/perf/trace/beauty/kvm_ioctl.sh +++ b/tools/perf/trace/beauty/kvm_ioctl.sh @@ -1,10 +1,11 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -kvm_header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *kvm_ioctl_cmds[] = {\n" regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' -egrep $regex ${kvm_header_dir}/kvm.h | \ +egrep $regex ${header_dir}/kvm.h | \ sed -r "s/$regex/\2 \1/g" | \ egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \ sort | xargs printf "\t[%s] = \"%s\",\n" diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh index 60ef8640ee70..4527d290cdfc 100755 --- a/tools/perf/trace/beauty/madvise_behavior.sh +++ b/tools/perf/trace/beauty/madvise_behavior.sh @@ -1,6 +1,7 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ printf "static const char *madvise_advices[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c index 9f68077b241b..c534bd96ef5c 100644 --- a/tools/perf/trace/beauty/mmap.c +++ b/tools/perf/trace/beauty/mmap.c @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <uapi/linux/mman.h> +#include <linux/log2.h> static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) @@ -30,50 +31,23 @@ static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot +static size_t mmap__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mmap_flags_array.c" + static DEFINE_STRARRAY(mmap_flags); + + return strarray__scnprintf_flags(&strarray__mmap_flags, bf, size, flags); +} + static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size, struct syscall_arg *arg) { - int printed = 0, flags = arg->val; + unsigned long flags = arg->val; if (flags & MAP_ANONYMOUS) arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */ -#define P_MMAP_FLAG(n) \ - if (flags & MAP_##n) { \ - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ - flags &= ~MAP_##n; \ - } - - P_MMAP_FLAG(SHARED); - P_MMAP_FLAG(PRIVATE); -#ifdef MAP_32BIT - P_MMAP_FLAG(32BIT); -#endif - P_MMAP_FLAG(ANONYMOUS); - P_MMAP_FLAG(DENYWRITE); - P_MMAP_FLAG(EXECUTABLE); - P_MMAP_FLAG(FILE); - P_MMAP_FLAG(FIXED); -#ifdef MAP_FIXED_NOREPLACE - P_MMAP_FLAG(FIXED_NOREPLACE); -#endif - P_MMAP_FLAG(GROWSDOWN); - P_MMAP_FLAG(HUGETLB); - P_MMAP_FLAG(LOCKED); - P_MMAP_FLAG(NONBLOCK); - P_MMAP_FLAG(NORESERVE); - P_MMAP_FLAG(POPULATE); - P_MMAP_FLAG(STACK); - P_MMAP_FLAG(UNINITIALIZED); -#ifdef MAP_SYNC - P_MMAP_FLAG(SYNC); -#endif -#undef P_MMAP_FLAG - - if (flags) - printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); - - return printed; + return mmap__scnprintf_flags(flags, bf, size); } #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh new file mode 100755 index 000000000000..22c3fdca8975 --- /dev/null +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +if [ $# -ne 2 ] ; then + [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + header_dir=tools/include/uapi/asm-generic + arch_header_dir=tools/arch/${hostarch}/include/uapi/asm +else + header_dir=$1 + arch_header_dir=$2 +fi + +arch_mman=${arch_header_dir}/mman.h + +# those in egrep -vw are flags, we want just the bits + +printf "static const char *mmap_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +egrep -q $regex ${arch_mman} && \ +(egrep $regex ${arch_mman} | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.*' ${arch_mman} && +(egrep $regex ${header_dir}/mman-common.h | \ + egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q '#[[:space:]]*include[[:space:]]+<uapi/asm-generic/mman.h>.*' ${arch_mman} && +(egrep $regex ${header_dir}/mman.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +printf "};\n" diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c index d929ad7dd97b..6879d36d3004 100644 --- a/tools/perf/trace/beauty/mode_t.c +++ b/tools/perf/trace/beauty/mode_t.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> diff --git a/tools/perf/trace/beauty/mount_flags.c b/tools/perf/trace/beauty/mount_flags.c new file mode 100644 index 000000000000..712935c6620a --- /dev/null +++ b/tools/perf/trace/beauty/mount_flags.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/mount_flags.c + * + * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <sys/mount.h> + +static size_t mount__scnprintf_flags(unsigned long flags, char *bf, size_t size) +{ +#include "trace/beauty/generated/mount_flags_array.c" + static DEFINE_STRARRAY(mount_flags); + + return strarray__scnprintf_flags(&strarray__mount_flags, bf, size, flags); +} + +unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg __maybe_unused, unsigned long flags) +{ + // do_mount in fs/namespace.c: + /* + * Pre-0.97 versions of mount() didn't have a flags word. When the + * flags word was introduced its top half was required to have the + * magic value 0xC0ED, and this remained so until 2.4.0-test9. + * Therefore, if this magic number is present, it carries no + * information and must be discarded. + */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; + + return flags; +} + +size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long flags = arg->val; + + return mount__scnprintf_flags(flags, bf, size); +} diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh new file mode 100755 index 000000000000..45547573a1db --- /dev/null +++ b/tools/perf/trace/beauty/mount_flags.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ + +printf "static const char *mount_flags[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \ + sed -r "s/$regex/\2 \2 \1/g" | sort -n | \ + xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*' +egrep $regex ${header_dir}/fs.h | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[%s + 1] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index c064d6aae659..1b9d6306d274 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c index 6aec6178a99d..cc673fec9184 100644 --- a/tools/perf/trace/beauty/open_flags.c +++ b/tools/perf/trace/beauty/open_flags.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c index 2bafd7c995ff..981185c1974b 100644 --- a/tools/perf/trace/beauty/perf_event_open.c +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef PERF_FLAG_FD_NO_GROUP # define PERF_FLAG_FD_NO_GROUP (1UL << 0) #endif diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh index faea4237c793..9aabd9743ef6 100755 --- a/tools/perf/trace/beauty/perf_ioctl.sh +++ b/tools/perf/trace/beauty/perf_ioctl.sh @@ -1,6 +1,7 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *perf_ioctl_cmds[] = {\n" regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*' diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c index 0313df342830..1a6acc46807b 100644 --- a/tools/perf/trace/beauty/pid.c +++ b/tools/perf/trace/beauty/pid.c @@ -1,4 +1,5 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 + size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg) { int pid = arg->val; diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c index 2ba784a3734a..1b8ed4cac815 100644 --- a/tools/perf/trace/beauty/pkey_alloc.c +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -1,40 +1,36 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/pkey_alloc.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" #include <linux/kernel.h> #include <linux/log2.h> -static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +size_t strarray__scnprintf_flags(struct strarray *sa, char *bf, size_t size, unsigned long flags) { int i, printed = 0; -#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" - static DEFINE_STRARRAY(pkey_alloc_access_rights); - - if (access_rights == 0) { - const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (flags == 0) { + const char *s = sa->entries[0]; if (s) return scnprintf(bf, size, "%s", s); return scnprintf(bf, size, "%d", 0); } - for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { - int bit = 1 << (i - 1); + for (i = 1; i < sa->nr_entries; ++i) { + unsigned long bit = 1UL << (i - 1); - if (!(access_rights & bit)) + if (!(flags & bit)) continue; if (printed != 0) printed += scnprintf(bf + printed, size - printed, "|"); - if (strarray__pkey_alloc_access_rights.entries[i] != NULL) - printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + if (sa->entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", sa->entries[i]); else printed += scnprintf(bf + printed, size - printed, "0x%#", bit); } @@ -42,6 +38,14 @@ static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, s return printed; } +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + return strarray__scnprintf_flags(&strarray__pkey_alloc_access_rights, bf, size, access_rights); +} + size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) { unsigned long cmd = arg->val; diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh index 62e51a02b839..f8f1b560cf8a 100755 --- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh +++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -1,6 +1,7 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/ printf "static const char *pkey_alloc_access_rights[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*' diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c index 246130dad6c4..be7a5d395975 100644 --- a/tools/perf/trace/beauty/prctl.c +++ b/tools/perf/trace/beauty/prctl.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/prctl.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 0be4138fbe71..d32f8f1124af 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -1,6 +1,7 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*' diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c index ba5096ae76b6..48f2b5c9aa3e 100644 --- a/tools/perf/trace/beauty/sched_policy.c +++ b/tools/perf/trace/beauty/sched_policy.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sched.h> /* diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c index b7097fd5fed9..e36156b19c70 100644 --- a/tools/perf/trace/beauty/seccomp.c +++ b/tools/perf/trace/beauty/seccomp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #ifndef SECCOMP_SET_MODE_STRICT #define SECCOMP_SET_MODE_STRICT 0 #endif diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c index bde18a53f090..587fec545b8a 100644 --- a/tools/perf/trace/beauty/signum.c +++ b/tools/perf/trace/beauty/signum.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <signal.h> static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg) diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh index aad5ab130539..e0803b957593 100755 --- a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh +++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -1,8 +1,9 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -sound_header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n" -grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $sound_header_dir/asound.h | \ +grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $header_dir/asound.h | \ sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g' printf "};\n" diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh index b7e9ef6b2f55..7a464a7bf913 100755 --- a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh +++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh @@ -1,8 +1,9 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -sound_header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/ printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n" -grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $sound_header_dir/asound.h | \ +grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $header_dir/asound.h | \ sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g' printf "};\n" diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c new file mode 100644 index 000000000000..9410ad230f10 --- /dev/null +++ b/tools/perf/trace/beauty/sockaddr.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + +#include "trace/beauty/beauty.h" +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/un.h> +#include <arpa/inet.h> + +static const char *socket_families[] = { + "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM", + "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI", + "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC", + "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC", + "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF", + "ALG", "NFC", "VSOCK", +}; +DEFINE_STRARRAY(socket_families); + +static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)sa; + char tmp[16]; + return scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin->sin_port), + inet_ntop(sin->sin_family, &sin->sin_addr, tmp, sizeof(tmp))); +} + +static size_t af_inet6__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; + u32 flowinfo = ntohl(sin6->sin6_flowinfo); + char tmp[512]; + size_t printed = scnprintf(bf, size, ", port: %d, addr: %s", ntohs(sin6->sin6_port), + inet_ntop(sin6->sin6_family, &sin6->sin6_addr, tmp, sizeof(tmp))); + if (flowinfo != 0) + printed += scnprintf(bf + printed, size - printed, ", flowinfo: %lu", flowinfo); + if (sin6->sin6_scope_id != 0) + printed += scnprintf(bf + printed, size - printed, ", scope_id: %lu", sin6->sin6_scope_id); + + return printed; +} + +static size_t af_local__scnprintf(struct sockaddr *sa, char *bf, size_t size) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)sa; + return scnprintf(bf, size, ", path: %s", sun->sun_path); +} + +static size_t (*af_scnprintfs[])(struct sockaddr *sa, char *bf, size_t size) = { + [AF_LOCAL] = af_local__scnprintf, + [AF_INET] = af_inet__scnprintf, + [AF_INET6] = af_inet6__scnprintf, +}; + +static size_t syscall_arg__scnprintf_augmented_sockaddr(struct syscall_arg *arg, char *bf, size_t size) +{ + struct sockaddr *sa = (struct sockaddr *)arg->augmented.args; + char family[32]; + size_t printed; + + strarray__scnprintf(&strarray__socket_families, family, sizeof(family), "%d", sa->sa_family); + printed = scnprintf(bf, size, "{ .family: %s", family); + + if (sa->sa_family < ARRAY_SIZE(af_scnprintfs) && af_scnprintfs[sa->sa_family]) + printed += af_scnprintfs[sa->sa_family](sa, bf + printed, size - printed); + + return printed + scnprintf(bf + printed, size - printed, " }"); +} + +size_t syscall_arg__scnprintf_sockaddr(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_sockaddr(arg, bf, size); + + return scnprintf(bf, size, "%#x", arg->val); +} diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c new file mode 100644 index 000000000000..d971a2596417 --- /dev/null +++ b/tools/perf/trace/beauty/socket.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: LGPL-2.1 +/* + * trace/beauty/socket.c + * + * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> + */ + +#include "trace/beauty/beauty.h" +#include <sys/types.h> +#include <sys/socket.h> + +static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size) +{ +#include "trace/beauty/generated/socket_ipproto_array.c" + static DEFINE_STRARRAY(socket_ipproto); + + return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", protocol); +} + +size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg) +{ + int domain = syscall_arg__val(arg, 0); + + if (domain == AF_INET || domain == AF_INET6) + return socket__scnprintf_ipproto(arg->val, bf, size); + + return syscall_arg__scnprintf_int(bf, size, arg); +} diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh new file mode 100755 index 000000000000..de0f2f29017f --- /dev/null +++ b/tools/perf/trace/beauty/socket_ipproto.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 + +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ + +printf "static const char *socket_ipproto[] = {\n" +regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*' + +egrep $regex ${header_dir}/in.h | \ + sed -r "s/$regex/\2 \1/g" | \ + sort | xargs printf "\t[%s] = \"%s\",\n" +printf "};\n" diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c index bca26aef4a77..a63a9a332aa0 100644 --- a/tools/perf/trace/beauty/socket_type.c +++ b/tools/perf/trace/beauty/socket_type.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/socket.h> diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c index 5643b692af4c..630f2760dd66 100644 --- a/tools/perf/trace/beauty/statx.c +++ b/tools/perf/trace/beauty/statx.c @@ -1,9 +1,8 @@ +// SPDX-License-Identifier: LGPL-2.1 /* * trace/beauty/statx.c * * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> - * - * Released under the GPL v2. (and only v2, not any later version) */ #include "trace/beauty/beauty.h" diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh index 76f1de697787..439773daaf77 100755 --- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh +++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh @@ -1,17 +1,18 @@ #!/bin/sh +# SPDX-License-Identifier: LGPL-2.1 -vhost_virtio_header_dir=$1 +[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *vhost_virtio_ioctl_cmds[] = {\n" regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' -egrep $regex ${vhost_virtio_header_dir}/vhost.h | \ +egrep $regex ${header_dir}/vhost.h | \ sed -r "s/$regex/\2 \1/g" | \ sort | xargs printf "\t[%s] = \"%s\",\n" printf "};\n" printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n" regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*' -egrep $regex ${vhost_virtio_header_dir}/vhost.h | \ +egrep $regex ${header_dir}/vhost.h | \ sed -r "s/$regex/\2 \1/g" | \ sort | xargs printf "\t[%s] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c index 8465281a093d..42ff58ad613b 100644 --- a/tools/perf/trace/beauty/waitid_options.c +++ b/tools/perf/trace/beauty/waitid_options.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: LGPL-2.1 #include <sys/types.h> #include <sys/wait.h> diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3781d74088a7..1d00e5ec7906 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <sys/ttydefaults.h> +#include <asm/bug.h> struct disasm_line_samples { double percent; @@ -29,6 +30,7 @@ struct annotate_browser { struct rb_node *curr_hot; struct annotation_line *selection; struct arch *arch; + struct annotation_options *opts; bool searching_backwards; char search_bf[128]; }; @@ -114,7 +116,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) ops.width += 1; - annotation_line__write(al, notes, &ops); + annotation_line__write(al, notes, &ops, ab->opts); if (ops.current_entry) ab->selection = al; @@ -226,10 +228,10 @@ static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->samples_nr; i++) { - if (a->samples[i].percent == b->samples[i].percent) + for (i = 0; i < a->data_nr; i++) { + if (a->data[i].percent == b->data[i].percent) continue; - return a->samples[i].percent < b->samples[i].percent; + return a->data[i].percent < b->data[i].percent; } return 0; } @@ -313,11 +315,14 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - for (i = 0; i < pos->al.samples_nr; i++) { - struct annotation_data *sample = &pos->al.samples[i]; + for (i = 0; i < pos->al.data_nr; i++) { + double percent; - if (max_percent < sample->percent) - max_percent = sample->percent; + percent = annotation_data__percent(&pos->al.data[i], + browser->opts->percent_type); + + if (max_percent < percent) + max_percent = percent; } if (max_percent < 0.01 && pos->al.ipc == 0) { @@ -379,9 +384,10 @@ static void ui_browser__init_asm_mode(struct ui_browser *browser) #define SYM_TITLE_MAX_SIZE (PATH_MAX + 64) static int sym_title(struct symbol *sym, struct map *map, char *title, - size_t sz) + size_t sz, int percent_type) { - return snprintf(title, sz, "%s %s", sym->name, map->dso->long_name); + return snprintf(title, sz, "%s %s [Percent: %s]", sym->name, map->dso->long_name, + percent_type_str(percent_type)); } /* @@ -410,7 +416,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, notes = symbol__annotation(dl->ops.target.sym); pthread_mutex_lock(¬es->lock); - if (notes->src == NULL && symbol__alloc_hist(dl->ops.target.sym) < 0) { + if (!symbol__hists(dl->ops.target.sym, evsel->evlist->nr_entries)) { pthread_mutex_unlock(¬es->lock); ui__warning("Not enough memory for annotating '%s' symbol!\n", dl->ops.target.sym->name); @@ -418,8 +424,8 @@ static bool annotate_browser__callq(struct annotate_browser *browser, } pthread_mutex_unlock(¬es->lock); - symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt); - sym_title(ms->sym, ms->map, title, sizeof(title)); + symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt, browser->opts); + sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type); ui_browser__show_title(&browser->b, title); return true; } @@ -594,6 +600,7 @@ bool annotate_browser__continue_search_reverse(struct annotate_browser *browser, static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help) { + struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct map_symbol *ms = browser->priv; struct symbol *sym = ms->sym; char symbol_dso[SYM_TITLE_MAX_SIZE]; @@ -601,7 +608,7 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const if (ui_browser__show(browser, title, help) < 0) return -1; - sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso)); + sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type); ui_browser__gotorc_title(browser, 0, 0); ui_browser__set_color(browser, HE_COLORSET_ROOT); @@ -609,6 +616,39 @@ static int annotate_browser__show(struct ui_browser *browser, char *title, const return 0; } +static void +switch_percent_type(struct annotation_options *opts, bool base) +{ + switch (opts->percent_type) { + case PERCENT_HITS_LOCAL: + if (base) + opts->percent_type = PERCENT_PERIOD_LOCAL; + else + opts->percent_type = PERCENT_HITS_GLOBAL; + break; + case PERCENT_HITS_GLOBAL: + if (base) + opts->percent_type = PERCENT_PERIOD_GLOBAL; + else + opts->percent_type = PERCENT_HITS_LOCAL; + break; + case PERCENT_PERIOD_LOCAL: + if (base) + opts->percent_type = PERCENT_HITS_LOCAL; + else + opts->percent_type = PERCENT_PERIOD_GLOBAL; + break; + case PERCENT_PERIOD_GLOBAL: + if (base) + opts->percent_type = PERCENT_HITS_GLOBAL; + else + opts->percent_type = PERCENT_PERIOD_LOCAL; + break; + default: + WARN_ON(1); + } +} + static int annotate_browser__run(struct annotate_browser *browser, struct perf_evsel *evsel, struct hist_browser_timer *hbt) @@ -623,8 +663,7 @@ static int annotate_browser__run(struct annotate_browser *browser, char title[256]; int key; - annotation__scnprintf_samples_period(notes, title, sizeof(title), evsel); - + hists__scnprintf_title(hists, title, sizeof(title)); if (annotate_browser__show(&browser->b, title, help) < 0) return -1; @@ -695,10 +734,13 @@ static int annotate_browser__run(struct annotate_browser *browser, "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n" "s Toggle source code view\n" "t Circulate percent, total period, samples view\n" + "c Show min/max cycle\n" "/ Search string\n" "k Toggle line numbers\n" "P Print to [symbol_name].annotation file.\n" "r Run available scripts\n" + "p Toggle percent type [local/global]\n" + "b Toggle percent base [period/hits]\n" "? Search string backwards\n"); continue; case 'r': @@ -779,7 +821,7 @@ show_sup_ins: continue; } case 'P': - map_symbol__annotation_dump(ms, evsel); + map_symbol__annotation_dump(ms, evsel, browser->opts); continue; case 't': if (notes->options->show_total_period) { @@ -791,6 +833,19 @@ show_sup_ins: notes->options->show_total_period = true; annotation__update_column_widths(notes); continue; + case 'c': + if (notes->options->show_minmax_cycle) + notes->options->show_minmax_cycle = false; + else + notes->options->show_minmax_cycle = true; + annotation__update_column_widths(notes); + continue; + case 'p': + case 'b': + switch_percent_type(browser->opts, key == 'b'); + hists__scnprintf_title(hists, title, sizeof(title)); + annotate_browser__show(&browser->b, title, help); + continue; case K_LEFT: case K_ESC: case 'q': @@ -809,24 +864,27 @@ out: } int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { - return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt, opts); } int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { /* reset abort key so that it can get Ctrl-C as a key */ SLang_reset_tty(); SLang_init_tty(0, 0, 0); - return map_symbol__tui_annotate(&he->ms, evsel, hbt); + return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts); } int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, - struct hist_browser_timer *hbt) + struct hist_browser_timer *hbt, + struct annotation_options *opts) { struct annotation *notes = symbol__annotation(sym); struct map_symbol ms = { @@ -843,6 +901,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, .priv = &ms, .use_navkeypressed = true, }, + .opts = opts, }; int ret = -1, err; @@ -852,7 +911,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate2(sym, map, evsel, &annotation__default_options, &browser.arch); + err = symbol__annotate2(sym, map, evsel, opts, &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index e5f247247daa..a96f62ca984a 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1231,6 +1231,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, int width = browser->b.width; char folded_sign = ' '; bool current_entry = ui_browser__is_current_entry(&browser->b, row); + bool use_callchain = hist_entry__has_callchains(entry) && symbol_conf.use_callchain; off_t row_offset = entry->row_offset; bool first = true; struct perf_hpp_fmt *fmt; @@ -1240,7 +1241,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, browser->selection = &entry->ms; } - if (symbol_conf.use_callchain) { + if (use_callchain) { hist_entry__init_have_children(entry); folded_sign = hist_entry__folded(entry); } @@ -1276,7 +1277,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, } if (first) { - if (symbol_conf.use_callchain) { + if (use_callchain) { ui_browser__printf(&browser->b, "%c ", folded_sign); width -= 2; } @@ -1583,7 +1584,7 @@ hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf, int column = 0; int span = 0; - if (symbol_conf.use_callchain) { + if (hists__has_callchains(hists) && symbol_conf.use_callchain) { ret = scnprintf(buf, size, " "); if (advance_hpp_check(&dummy_hpp, ret)) return ret; @@ -1987,7 +1988,7 @@ static int hist_browser__fprintf_entry(struct hist_browser *browser, bool first = true; int ret; - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { folded_sign = hist_entry__folded(he); printed += fprintf(fp, "%c ", folded_sign); } @@ -2175,7 +2176,8 @@ struct hist_browser *hist_browser__new(struct hists *hists) static struct hist_browser * perf_evsel_browser__new(struct perf_evsel *evsel, struct hist_browser_timer *hbt, - struct perf_env *env) + struct perf_env *env, + struct annotation_options *annotation_opts) { struct hist_browser *browser = hist_browser__new(evsel__hists(evsel)); @@ -2183,6 +2185,7 @@ perf_evsel_browser__new(struct perf_evsel *evsel, browser->hbt = hbt; browser->env = env; browser->title = hists_browser__scnprintf_title; + browser->annotation_opts = annotation_opts; } return browser; } @@ -2336,7 +2339,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) struct hist_entry *he; int err; - if (!objdump_path && perf_env__lookup_objdump(browser->env)) + if (!browser->annotation_opts->objdump_path && + perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path)) return 0; notes = symbol__annotation(act->ms.sym); @@ -2344,7 +2348,8 @@ do_annotate(struct hist_browser *browser, struct popup_action *act) return 0; evsel = hists_to_evsel(browser->hists); - err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt, + browser->annotation_opts); he = hist_browser__selected_entry(browser); /* * offer option to annotate the other branch source or target @@ -2667,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, he->nr_rows = 0; } - if (!he->leaf || !symbol_conf.use_callchain) + if (!he->leaf || !hist_entry__has_callchains(he) || !symbol_conf.use_callchain) goto next; if (callchain_param.mode == CHAIN_GRAPH_REL) { @@ -2697,10 +2702,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env); + struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts); struct branch_info *bi; #define MAX_OPTIONS 16 char *options[MAX_OPTIONS]; @@ -3062,6 +3068,7 @@ out: struct perf_evsel_menu { struct ui_browser b; struct perf_evsel *selection; + struct annotation_options *annotation_opts; bool lost_events, lost_events_warned; float min_pcnt; struct perf_env *env; @@ -3163,7 +3170,8 @@ browse_hists: true, hbt, menu->min_pcnt, menu->env, - warn_lost_event); + warn_lost_event, + menu->annotation_opts); ui_browser__show_title(&menu->b, title); switch (key) { case K_TAB: @@ -3222,7 +3230,8 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { struct perf_evsel *pos; struct perf_evsel_menu menu = { @@ -3237,6 +3246,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, }, .min_pcnt = min_pcnt, .env = env, + .annotation_opts = annotation_opts, }; ui_helpline__push("Press ESC to exit"); @@ -3257,7 +3267,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event) + bool warn_lost_event, + struct annotation_options *annotation_opts) { int nr_entries = evlist->nr_entries; @@ -3267,7 +3278,8 @@ single_entry: return perf_evsel__hists_browse(first, nr_entries, help, false, hbt, min_pcnt, - env, warn_lost_event); + env, warn_lost_event, + annotation_opts); } if (symbol_conf.event_group) { @@ -3285,5 +3297,6 @@ single_entry: return __perf_evlist__tui_browse_hists(evlist, nr_entries, help, hbt, min_pcnt, env, - warn_lost_event); + warn_lost_event, + annotation_opts); } diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 9428bee076f2..91d3e18b50aa 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -4,6 +4,8 @@ #include "ui/browser.h" +struct annotation_options; + struct hist_browser { struct ui_browser b; struct hists *hists; @@ -12,6 +14,7 @@ struct hist_browser { struct hist_browser_timer *hbt; struct pstack *pstack; struct perf_env *env; + struct annotation_options *annotation_opts; int print_seq; bool show_dso; bool show_headers; diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index e03fa75f108a..5b8b8c637686 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c @@ -104,7 +104,7 @@ int map__browse(struct map *map) { struct map_browser mb = { .b = { - .entries = &map->dso->symbols[map->type], + .entries = &map->dso->symbols, .refresh = ui_browser__rb_tree_refresh, .seek = ui_browser__rb_tree_seek, .write = map_browser__write, diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index aeeaf15029f0..48428c9acd89 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL); + err = symbol__annotate(sym, map, evsel, 0, &annotation__default_options, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 24e1ec201ffd..4ab663ec3e5e 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -382,7 +382,8 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (symbol_conf.use_callchain && hists__has(hists, sym)) { + if (hist_entry__has_callchains(h) && + symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? h->stat_acc->period : h->stat.period; @@ -479,7 +480,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, } } - if (symbol_conf.use_callchain && he->leaf) { + if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? he->stat_acc->period : he->stat.period; diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index 706f6f1e9c7d..fe3dfaa64a91 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -207,7 +207,7 @@ static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, if (ret) return ret; - if (a->thread != b->thread || !symbol_conf.use_callchain) + if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain) return 0; ret = b->callchain->max_depth - a->callchain->max_depth; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 6832fcb2e6ff..74c4ae1f0a05 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he, } printed += putc('\n', fp); - if (symbol_conf.use_callchain && he->leaf) { + if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) { u64 total = hists__total_period(hists); printed += hist_entry_callchain__fprintf(he, total, 0, fp); @@ -529,7 +529,7 @@ out: static int hist_entry__fprintf(struct hist_entry *he, size_t size, char *bf, size_t bfsz, FILE *fp, - bool use_callchain) + bool ignore_callchains) { int ret; int callchain_ret = 0; @@ -550,7 +550,7 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size, ret = fprintf(fp, "%s\n", bf); - if (use_callchain) + if (hist_entry__has_callchains(he) && !ignore_callchains) callchain_ret = hist_entry_callchain__fprintf(he, total_period, 0, fp); @@ -755,7 +755,7 @@ int hists__fprintf_headers(struct hists *hists, FILE *fp) size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, - bool use_callchain) + bool ignore_callchains) { struct rb_node *nd; size_t ret = 0; @@ -799,7 +799,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, if (percent < min_pcnt) continue; - ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, use_callchain); + ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, ignore_callchains); if (max_rows && ++nr_rows >= max_rows) break; @@ -819,8 +819,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, } if (h->ms.map == NULL && verbose > 1) { - __map_groups__fprintf_maps(h->thread->mg, - MAP__FUNCTION, fp); + map_groups__fprintf(h->thread->mg, fp); fprintf(fp, "%.10s end\n", graph_dotted_line); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8052373bcd6a..b7bf201fe8a8 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -10,6 +10,7 @@ libperf-y += evlist.o libperf-y += evsel.o libperf-y += evsel_fprintf.o libperf-y += find_bit.o +libperf-y += get_current_dir_name.o libperf-y += kallsyms.o libperf-y += levenshtein.o libperf-y += llvm-utils.o @@ -24,7 +25,6 @@ libperf-y += libstring.o libperf-y += bitmap.o libperf-y += hweight.o libperf-y += smt.o -libperf-y += quote.o libperf-y += strbuf.o libperf-y += string.o libperf-y += strlist.o @@ -74,6 +74,7 @@ libperf-y += vdso.o libperf-y += counts.o libperf-y += stat.o libperf-y += stat-shadow.o +libperf-y += stat-display.o libperf-y += record.o libperf-y += srcline.o libperf-y += data.o @@ -88,6 +89,7 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-$(CONFIG_AUXTRACE) += arm-spe.o libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o +libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o ifdef CONFIG_LIBOPENCSD libperf-$(CONFIG_AUXTRACE) += cs-etm.o @@ -152,6 +154,8 @@ libperf-y += perf-hooks.o libperf-$(CONFIG_CXX) += c++/ CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" + # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5d74a30fe00f..6936daf89ddd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -21,6 +21,7 @@ #include "debug.h" #include "annotate.h" #include "evsel.h" +#include "evlist.h" #include "block-range.h" #include "string2.h" #include "arch/common.h" @@ -46,11 +47,11 @@ struct annotation_options annotation__default_options = { .use_offset = true, .jump_arrows = true, + .annotate_src = true, .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS, + .percent_type = PERCENT_PERIOD_LOCAL, }; -const char *disassembler_style; -const char *objdump_path; static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); @@ -138,6 +139,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i #include "arch/x86/annotate/instructions.c" #include "arch/powerpc/annotate/instructions.c" #include "arch/s390/annotate/instructions.c" +#include "arch/sparc/annotate/instructions.c" static struct arch architectures[] = { { @@ -169,6 +171,13 @@ static struct arch architectures[] = { .comment_char = '#', }, }, + { + .name = "sparc", + .init = sparc__annotate_init, + .objdump = { + .comment_char = '#', + }, + }, }; static void ins__delete(struct ins_operands *ops) @@ -245,8 +254,14 @@ find_target: indirect_call: tok = strchr(endptr, '*'); - if (tok != NULL) - ops->target.addr = strtoull(tok + 1, NULL, 16); + if (tok != NULL) { + endptr++; + + /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx). + * Do not parse such instruction. */ + if (strstr(endptr, "(%r") == NULL) + ops->target.addr = strtoull(endptr, NULL, 16); + } goto find_target; } @@ -275,7 +290,19 @@ bool ins__is_call(const struct ins *ins) return ins->ops == &call_ops || ins->ops == &s390_call_ops; } -static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms) +/* + * Prevents from matching commas in the comment section, e.g.: + * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast + */ +static inline const char *validate_comma(const char *c, struct ins_operands *ops) +{ + if (ops->raw_comment && c > ops->raw_comment) + return NULL; + + return c; +} + +static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) { struct map *map = ms->map; struct symbol *sym = ms->sym; @@ -284,6 +311,10 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op }; const char *c = strchr(ops->raw, ','); u64 start, end; + + ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + c = validate_comma(c, ops); + /* * Examples of lines to parse for the _cpp_lex_token@@Base * function: @@ -303,6 +334,7 @@ static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands *op ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) { c = strchr(c, ','); + c = validate_comma(c, ops); if (c++ != NULL) ops->target.addr = strtoull(c, NULL, 16); } @@ -360,9 +392,12 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name); c = strchr(ops->raw, ','); + c = validate_comma(c, ops); + if (c != NULL) { const char *c2 = strchr(c + 1, ','); + c2 = validate_comma(c2, ops); /* check for 3-op insn */ if (c2 != NULL) c = c2; @@ -678,10 +713,28 @@ static struct arch *arch__find(const char *name) return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp); } -int symbol__alloc_hist(struct symbol *sym) +static struct annotated_source *annotated_source__new(void) +{ + struct annotated_source *src = zalloc(sizeof(*src)); + + if (src != NULL) + INIT_LIST_HEAD(&src->source); + + return src; +} + +static __maybe_unused void annotated_source__delete(struct annotated_source *src) +{ + if (src == NULL) + return; + zfree(&src->histograms); + zfree(&src->cycles_hist); + free(src); +} + +static int annotated_source__alloc_histograms(struct annotated_source *src, + size_t size, int nr_hists) { - struct annotation *notes = symbol__annotation(sym); - size_t size = symbol__size(sym); size_t sizeof_sym_hist; /* @@ -701,17 +754,13 @@ int symbol__alloc_hist(struct symbol *sym) sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry)); /* Check for overflow in zalloc argument */ - if (sizeof_sym_hist > (SIZE_MAX - sizeof(*notes->src)) - / symbol_conf.nr_events) + if (sizeof_sym_hist > SIZE_MAX / nr_hists) return -1; - notes->src = zalloc(sizeof(*notes->src) + symbol_conf.nr_events * sizeof_sym_hist); - if (notes->src == NULL) - return -1; - notes->src->sizeof_sym_hist = sizeof_sym_hist; - notes->src->nr_histograms = symbol_conf.nr_events; - INIT_LIST_HEAD(¬es->src->source); - return 0; + src->sizeof_sym_hist = sizeof_sym_hist; + src->nr_histograms = nr_hists; + src->histograms = calloc(nr_hists, sizeof_sym_hist) ; + return src->histograms ? 0 : -1; } /* The cycles histogram is lazily allocated. */ @@ -741,14 +790,11 @@ void symbol__annotate_zero_histograms(struct symbol *sym) pthread_mutex_unlock(¬es->lock); } -static int __symbol__account_cycles(struct annotation *notes, +static int __symbol__account_cycles(struct cyc_hist *ch, u64 start, unsigned offset, unsigned cycles, unsigned have_start) { - struct cyc_hist *ch; - - ch = notes->src->cycles_hist; /* * For now we can only account one basic block per * final jump. But multiple could be overlapping. @@ -760,6 +806,15 @@ static int __symbol__account_cycles(struct annotation *notes, ch[offset].num_aggr++; ch[offset].cycles_aggr += cycles; + if (cycles > ch[offset].cycles_max) + ch[offset].cycles_max = cycles; + + if (ch[offset].cycles_min) { + if (cycles && cycles < ch[offset].cycles_min) + ch[offset].cycles_min = cycles; + } else + ch[offset].cycles_min = cycles; + if (!have_start && ch[offset].have_start) return 0; if (ch[offset].num) { @@ -782,7 +837,7 @@ static int __symbol__account_cycles(struct annotation *notes, } static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, - struct annotation *notes, int evidx, u64 addr, + struct annotated_source *src, int evidx, u64 addr, struct perf_sample *sample) { unsigned offset; @@ -798,7 +853,12 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, } offset = addr - sym->start; - h = annotation__histogram(notes, evidx); + h = annotated_source__histogram(src, evidx); + if (h == NULL) { + pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n", + __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC); + return -ENOMEM; + } h->nr_samples++; h->addr[offset].nr_samples++; h->period += sample->period; @@ -811,45 +871,69 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } -static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) +static struct cyc_hist *symbol__cycles_hist(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); if (notes->src == NULL) { - if (symbol__alloc_hist(sym) < 0) + notes->src = annotated_source__new(); + if (notes->src == NULL) return NULL; + goto alloc_cycles_hist; + } + + if (!notes->src->cycles_hist) { +alloc_cycles_hist: + symbol__alloc_hist_cycles(sym); } - if (!notes->src->cycles_hist && cycles) { - if (symbol__alloc_hist_cycles(sym) < 0) + + return notes->src->cycles_hist; +} + +struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists) +{ + struct annotation *notes = symbol__annotation(sym); + + if (notes->src == NULL) { + notes->src = annotated_source__new(); + if (notes->src == NULL) return NULL; + goto alloc_histograms; } - return notes; + + if (notes->src->histograms == NULL) { +alloc_histograms: + annotated_source__alloc_histograms(notes->src, symbol__size(sym), + nr_hists); + } + + return notes->src; } static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, - int evidx, u64 addr, + struct perf_evsel *evsel, u64 addr, struct perf_sample *sample) { - struct annotation *notes; + struct annotated_source *src; if (sym == NULL) return 0; - notes = symbol__get_annotation(sym, false); - if (notes == NULL) + src = symbol__hists(sym, evsel->evlist->nr_entries); + if (src == NULL) return -ENOMEM; - return __symbol__inc_addr_samples(sym, map, notes, evidx, addr, sample); + return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample); } static int symbol__account_cycles(u64 addr, u64 start, struct symbol *sym, unsigned cycles) { - struct annotation *notes; + struct cyc_hist *cycles_hist; unsigned offset; if (sym == NULL) return 0; - notes = symbol__get_annotation(sym, true); - if (notes == NULL) + cycles_hist = symbol__cycles_hist(sym); + if (cycles_hist == NULL) return -ENOMEM; if (addr < sym->start || addr >= sym->end) return -ERANGE; @@ -861,7 +945,7 @@ static int symbol__account_cycles(u64 addr, u64 start, start = 0; } offset = addr - sym->start; - return __symbol__account_cycles(notes, + return __symbol__account_cycles(cycles_hist, start ? start - sym->start : 0, offset, cycles, !!start); @@ -953,8 +1037,11 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) if (ch->have_start) annotation__count_and_fill(notes, ch->start, offset, ch); al = notes->offsets[offset]; - if (al && ch->num_aggr) + if (al && ch->num_aggr) { al->cycles = ch->cycles_aggr / ch->num_aggr; + al->cycles_max = ch->cycles_max; + al->cycles_min = ch->cycles_min; + } notes->have_cycles = true; } } @@ -962,15 +1049,15 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, - int evidx) + struct perf_evsel *evsel) { - return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr, sample); + return symbol__inc_addr_samples(ams->sym, ams->map, evsel, ams->al_addr, sample); } int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, - int evidx, u64 ip) + struct perf_evsel *evsel, u64 ip) { - return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evidx, ip, sample); + return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evsel, ip, sample); } static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms) @@ -1019,6 +1106,7 @@ struct annotate_args { struct arch *arch; struct map_symbol ms; struct perf_evsel *evsel; + struct annotation_options *options; s64 offset; char *line; int line_nr; @@ -1055,7 +1143,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize) if (perf_evsel__is_group_event(evsel)) nr = evsel->nr_members; - size += sizeof(al->samples[0]) * nr; + size += sizeof(al->data[0]) * nr; al = zalloc(size); if (al) { @@ -1064,7 +1152,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize) al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; - al->samples_nr = nr; + al->data_nr = nr; } return al; @@ -1244,7 +1332,8 @@ static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_wi static int annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue, int addr_fmt_width) + int max_lines, struct annotation_line *queue, int addr_fmt_width, + int percent_type) { struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; @@ -1256,15 +1345,18 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start const char *color; struct annotation *notes = symbol__annotation(sym); - for (i = 0; i < al->samples_nr; i++) { - struct annotation_data *sample = &al->samples[i]; + for (i = 0; i < al->data_nr; i++) { + double percent; - if (sample->percent > max_percent) - max_percent = sample->percent; + percent = annotation_data__percent(&al->data[i], + percent_type); + + if (percent > max_percent) + max_percent = percent; } - if (al->samples_nr > nr_percent) - nr_percent = al->samples_nr; + if (al->data_nr > nr_percent) + nr_percent = al->data_nr; if (max_percent < min_pcnt) return -1; @@ -1277,7 +1369,8 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue == al) break; annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL, addr_fmt_width); + 0, 0, 1, NULL, addr_fmt_width, + percent_type); } } @@ -1298,18 +1391,20 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start } for (i = 0; i < nr_percent; i++) { - struct annotation_data *sample = &al->samples[i]; + struct annotation_data *data = &al->data[i]; + double percent; - color = get_percent_color(sample->percent); + percent = annotation_data__percent(data, percent_type); + color = get_percent_color(percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample->he.period); + data->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample->he.nr_samples); + data->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", sample->percent); + color_fprintf(stdout, color, " %7.2f", percent); } printf(" : "); @@ -1560,6 +1655,7 @@ fallback: static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct annotation_options *opts = args->options; struct map *map = args->ms.map; struct dso *dso = map->dso; char *command; @@ -1567,6 +1663,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) char symfs_filename[PATH_MAX]; struct kcore_extract kce; bool delete_extract = false; + bool decomp = false; int stdout_fd[2]; int lineno = 0; int nline; @@ -1600,6 +1697,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) tmp, sizeof(tmp)) < 0) goto out; + decomp = true; strcpy(symfs_filename, tmp); } @@ -1607,13 +1705,13 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) "%s %s%s --start-address=0x%016" PRIx64 " --stop-address=0x%016" PRIx64 " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand", - objdump_path ? objdump_path : "objdump", - disassembler_style ? "-M " : "", - disassembler_style ? disassembler_style : "", + opts->objdump_path ?: "objdump", + opts->disassembler_style ? "-M " : "", + opts->disassembler_style ?: "", map__rip_2objdump(map, sym->start), map__rip_2objdump(map, sym->end), - symbol_conf.annotate_asm_raw ? "" : "--no-show-raw", - symbol_conf.annotate_src ? "-S" : "", + opts->show_asm_raw ? "" : "--no-show-raw", + opts->annotate_src ? "-S" : "", symfs_filename, symfs_filename); if (err < 0) { @@ -1686,7 +1784,7 @@ out_free_command: out_remove_tmp: close(stdout_fd[0]); - if (dso__needs_decompress(dso)) + if (decomp) unlink(symfs_filename); if (delete_extract) @@ -1699,34 +1797,45 @@ out_close_stdout: goto out_free_command; } -static void calc_percent(struct sym_hist *hist, - struct annotation_data *sample, +static void calc_percent(struct sym_hist *sym_hist, + struct hists *hists, + struct annotation_data *data, s64 offset, s64 end) { unsigned int hits = 0; u64 period = 0; while (offset < end) { - hits += hist->addr[offset].nr_samples; - period += hist->addr[offset].period; + hits += sym_hist->addr[offset].nr_samples; + period += sym_hist->addr[offset].period; ++offset; } - if (hist->nr_samples) { - sample->he.period = period; - sample->he.nr_samples = hits; - sample->percent = 100.0 * hits / hist->nr_samples; + if (sym_hist->nr_samples) { + data->he.period = period; + data->he.nr_samples = hits; + data->percent[PERCENT_HITS_LOCAL] = 100.0 * hits / sym_hist->nr_samples; } + + if (hists->stats.nr_non_filtered_samples) + data->percent[PERCENT_HITS_GLOBAL] = 100.0 * hits / hists->stats.nr_non_filtered_samples; + + if (sym_hist->period) + data->percent[PERCENT_PERIOD_LOCAL] = 100.0 * period / sym_hist->period; + + if (hists->stats.total_period) + data->percent[PERCENT_PERIOD_GLOBAL] = 100.0 * period / hists->stats.total_period; } static void annotation__calc_percent(struct annotation *notes, - struct perf_evsel *evsel, s64 len) + struct perf_evsel *leader, s64 len) { struct annotation_line *al, *next; + struct perf_evsel *evsel; list_for_each_entry(al, ¬es->src->source, node) { s64 end; - int i; + int i = 0; if (al->offset == -1) continue; @@ -1734,14 +1843,17 @@ static void annotation__calc_percent(struct annotation *notes, next = annotation_line__next(al, ¬es->src->source); end = next ? next->offset : len; - for (i = 0; i < al->samples_nr; i++) { - struct annotation_data *sample; - struct sym_hist *hist; + for_each_group_evsel(evsel, leader) { + struct hists *hists = evsel__hists(evsel); + struct annotation_data *data; + struct sym_hist *sym_hist; - hist = annotation__histogram(notes, evsel->idx + i); - sample = &al->samples[i]; + BUG_ON(i >= al->data_nr); - calc_percent(hist, sample, al->offset, end); + sym_hist = annotation__histogram(notes, evsel->idx); + data = &al->data[i++]; + + calc_percent(sym_hist, hists, data, al->offset, end); } } } @@ -1755,11 +1867,13 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, + struct annotation_options *options, struct arch **parch) { struct annotate_args args = { .privsize = privsize, .evsel = evsel, + .options = options, }; struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -1790,7 +1904,8 @@ int symbol__annotate(struct symbol *sym, struct map *map, return symbol__disassemble(sym, &args); } -static void insert_source_line(struct rb_root *root, struct annotation_line *al) +static void insert_source_line(struct rb_root *root, struct annotation_line *al, + struct annotation_options *opts) { struct annotation_line *iter; struct rb_node **p = &root->rb_node; @@ -1803,8 +1918,10 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al) ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < al->samples_nr; i++) - iter->samples[i].percent_sum += al->samples[i].percent; + for (i = 0; i < al->data_nr; i++) { + iter->data[i].percent_sum += annotation_data__percent(&al->data[i], + opts->percent_type); + } return; } @@ -1814,8 +1931,10 @@ static void insert_source_line(struct rb_root *root, struct annotation_line *al) p = &(*p)->rb_right; } - for (i = 0; i < al->samples_nr; i++) - al->samples[i].percent_sum = al->samples[i].percent; + for (i = 0; i < al->data_nr; i++) { + al->data[i].percent_sum = annotation_data__percent(&al->data[i], + opts->percent_type); + } rb_link_node(&al->rb_node, parent, p); rb_insert_color(&al->rb_node, root); @@ -1825,10 +1944,10 @@ static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->samples_nr; i++) { - if (a->samples[i].percent_sum == b->samples[i].percent_sum) + for (i = 0; i < a->data_nr; i++) { + if (a->data[i].percent_sum == b->data[i].percent_sum) continue; - return a->samples[i].percent_sum > b->samples[i].percent_sum; + return a->data[i].percent_sum > b->data[i].percent_sum; } return 0; @@ -1893,8 +2012,8 @@ static void print_summary(struct rb_root *root, const char *filename) int i; al = rb_entry(node, struct annotation_line, rb_node); - for (i = 0; i < al->samples_nr; i++) { - percent = al->samples[i].percent_sum; + for (i = 0; i < al->data_nr; i++) { + percent = al->data[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1937,8 +2056,8 @@ static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) } int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool full_paths, - int min_pcnt, int max_lines, int context) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; char *filename; @@ -1950,28 +2069,35 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; + bool context = opts->context; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; int graph_dotted_len; + char buf[512]; filename = strdup(dso->long_name); if (!filename) return -ENOMEM; - if (full_paths) + if (opts->full_path) d_filename = filename; else d_filename = basename(filename); len = symbol__size(sym); - if (perf_evsel__is_group_event(evsel)) + if (perf_evsel__is_group_event(evsel)) { width *= evsel->nr_members; + perf_evsel__group_desc(evsel, buf, sizeof(buf)); + evsel_name = buf; + } - graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples)\n", + graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples, " + "percent: %s)\n", width, width, symbol_conf.show_total_period ? "Period" : symbol_conf.show_nr_samples ? "Samples" : "Percent", - d_filename, evsel_name, h->nr_samples); + d_filename, evsel_name, h->nr_samples, + percent_type_str(opts->percent_type)); printf("%-*.*s----\n", graph_dotted_len, graph_dotted_len, graph_dotted_line); @@ -1990,8 +2116,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, } err = annotation_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue, addr_fmt_width); + opts->min_pcnt, printed, opts->max_lines, + queue, addr_fmt_width, opts->percent_type); switch (err) { case 0: @@ -2068,10 +2194,11 @@ static void FILE__write_graph(void *fp, int graph) fputs(s, fp); } -int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) +static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp, + struct annotation_options *opts) { struct annotation *notes = symbol__annotation(sym); - struct annotation_write_ops ops = { + struct annotation_write_ops wops = { .first_line = true, .obj = fp, .set_color = FILE__set_color, @@ -2085,15 +2212,16 @@ int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp) list_for_each_entry(al, ¬es->src->source, node) { if (annotation_line__filter(al, notes)) continue; - annotation_line__write(al, notes, &ops); + annotation_line__write(al, notes, &wops, opts); fputc('\n', fp); - ops.first_line = false; + wops.first_line = false; } return 0; } -int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel) +int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel, + struct annotation_options *opts) { const char *ev_name = perf_evsel__name(evsel); char buf[1024]; @@ -2115,7 +2243,7 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel) fprintf(fp, "%s() %s\nEvent: %s\n\n", ms->sym->name, ms->map->dso->long_name, ev_name); - symbol__annotate_fprintf2(ms->sym, fp); + symbol__annotate_fprintf2(ms->sym, fp, opts); fclose(fp); err = 0; @@ -2285,7 +2413,8 @@ void annotation__update_column_widths(struct annotation *notes) } static void annotation__calc_lines(struct annotation *notes, struct map *map, - struct rb_root *root) + struct rb_root *root, + struct annotation_options *opts) { struct annotation_line *al; struct rb_root tmp_root = RB_ROOT; @@ -2294,13 +2423,14 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, double percent_max = 0.0; int i; - for (i = 0; i < al->samples_nr; i++) { - struct annotation_data *sample; + for (i = 0; i < al->data_nr; i++) { + double percent; - sample = &al->samples[i]; + percent = annotation_data__percent(&al->data[i], + opts->percent_type); - if (sample->percent > percent_max) - percent_max = sample->percent; + if (percent > percent_max) + percent_max = percent; } if (percent_max <= 0.5) @@ -2308,42 +2438,43 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, al->path = get_srcline(map->dso, notes->start + al->offset, NULL, false, true, notes->start + al->offset); - insert_source_line(&tmp_root, al); + insert_source_line(&tmp_root, al, opts); } resort_source_line(root, &tmp_root); } static void symbol__calc_lines(struct symbol *sym, struct map *map, - struct rb_root *root) + struct rb_root *root, + struct annotation_options *opts) { struct annotation *notes = symbol__annotation(sym); - annotation__calc_lines(notes, map, root); + annotation__calc_lines(notes, map, root, opts); } int symbol__tty_annotate2(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - struct annotation_options opts = annotation__default_options; - struct annotation *notes = symbol__annotation(sym); + struct hists *hists = evsel__hists(evsel); char buf[1024]; - if (symbol__annotate2(sym, map, evsel, &opts, NULL) < 0) + if (symbol__annotate2(sym, map, evsel, opts, NULL) < 0) return -1; - if (print_lines) { - srcline_full_filename = full_paths; - symbol__calc_lines(sym, map, &source_line); + if (opts->print_lines) { + srcline_full_filename = opts->full_path; + symbol__calc_lines(sym, map, &source_line, opts); print_summary(&source_line, dso->long_name); } - annotation__scnprintf_samples_period(notes, buf, sizeof(buf), evsel); - fprintf(stdout, "%s\n%s() %s\n", buf, sym->name, dso->long_name); - symbol__annotate_fprintf2(sym, stdout); + hists__scnprintf_title(hists, buf, sizeof(buf)); + fprintf(stdout, "%s, [percent: %s]\n%s() %s\n", + buf, percent_type_str(opts->percent_type), sym->name, dso->long_name); + symbol__annotate_fprintf2(sym, stdout, opts); annotated_source__purge(symbol__annotation(sym)->src); @@ -2351,25 +2482,24 @@ int symbol__tty_annotate2(struct symbol *sym, struct map *map, } int symbol__tty_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths, int min_pcnt, int max_lines) + struct perf_evsel *evsel, + struct annotation_options *opts) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, opts, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); - if (print_lines) { - srcline_full_filename = full_paths; - symbol__calc_lines(sym, map, &source_line); + if (opts->print_lines) { + srcline_full_filename = opts->full_path; + symbol__calc_lines(sym, map, &source_line, opts); print_summary(&source_line, dso->long_name); } - symbol__annotate_printf(sym, map, evsel, full_paths, - min_pcnt, max_lines, 0); + symbol__annotate_printf(sym, map, evsel, opts); annotated_source__purge(symbol__annotation(sym)->src); @@ -2382,14 +2512,21 @@ bool ui__has_annotation(void) } -double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes) +static double annotation_line__max_percent(struct annotation_line *al, + struct annotation *notes, + unsigned int percent_type) { double percent_max = 0.0; int i; for (i = 0; i < notes->nr_events; i++) { - if (al->samples[i].percent > percent_max) - percent_max = al->samples[i].percent; + double percent; + + percent = annotation_data__percent(&al->data[i], + percent_type); + + if (percent > percent_max) + percent_max = percent; } return percent_max; @@ -2428,7 +2565,7 @@ call_like: static void __annotation_line__write(struct annotation_line *al, struct annotation *notes, bool first_line, bool current_entry, bool change_color, int width, - void *obj, + void *obj, unsigned int percent_type, int (*obj__set_color)(void *obj, int color), void (*obj__set_percent_color)(void *obj, double percent, bool current), int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current), @@ -2436,7 +2573,7 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati void (*obj__write_graph)(void *obj, int graph)) { - double percent_max = annotation_line__max_percent(al, notes); + double percent_max = annotation_line__max_percent(al, notes, percent_type); int pcnt_width = annotation__pcnt_width(notes), cycles_width = annotation__cycles_width(notes); bool show_title = false; @@ -2455,15 +2592,18 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati int i; for (i = 0; i < notes->nr_events; i++) { - obj__set_percent_color(obj, al->samples[i].percent, current_entry); + double percent; + + percent = annotation_data__percent(&al->data[i], percent_type); + + obj__set_percent_color(obj, percent, current_entry); if (notes->options->show_total_period) { - obj__printf(obj, "%11" PRIu64 " ", al->samples[i].he.period); + obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period); } else if (notes->options->show_nr_samples) { obj__printf(obj, "%6" PRIu64 " ", - al->samples[i].he.nr_samples); + al->data[i].he.nr_samples); } else { - obj__printf(obj, "%6.2f ", - al->samples[i].percent); + obj__printf(obj, "%6.2f ", percent); } } } else { @@ -2486,13 +2626,38 @@ static void __annotation_line__write(struct annotation_line *al, struct annotati else obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC"); - if (al->cycles) - obj__printf(obj, "%*" PRIu64 " ", + if (!notes->options->show_minmax_cycle) { + if (al->cycles) + obj__printf(obj, "%*" PRIu64 " ", ANNOTATION__CYCLES_WIDTH - 1, al->cycles); - else if (!show_title) - obj__printf(obj, "%*s", ANNOTATION__CYCLES_WIDTH, " "); - else - obj__printf(obj, "%*s ", ANNOTATION__CYCLES_WIDTH - 1, "Cycle"); + else if (!show_title) + obj__printf(obj, "%*s", + ANNOTATION__CYCLES_WIDTH, " "); + else + obj__printf(obj, "%*s ", + ANNOTATION__CYCLES_WIDTH - 1, + "Cycle"); + } else { + if (al->cycles) { + char str[32]; + + scnprintf(str, sizeof(str), + "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")", + al->cycles, al->cycles_min, + al->cycles_max); + + obj__printf(obj, "%*s ", + ANNOTATION__MINMAX_CYCLES_WIDTH - 1, + str); + } else if (!show_title) + obj__printf(obj, "%*s", + ANNOTATION__MINMAX_CYCLES_WIDTH, + " "); + else + obj__printf(obj, "%*s ", + ANNOTATION__MINMAX_CYCLES_WIDTH - 1, + "Cycle(min/max)"); + } } obj__printf(obj, " "); @@ -2556,13 +2721,15 @@ print_addr: } void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops) + struct annotation_write_ops *wops, + struct annotation_options *opts) { - __annotation_line__write(al, notes, ops->first_line, ops->current_entry, - ops->change_color, ops->width, ops->obj, - ops->set_color, ops->set_percent_color, - ops->set_jumps_percent_color, ops->printf, - ops->write_graph); + __annotation_line__write(al, notes, wops->first_line, wops->current_entry, + wops->change_color, wops->width, wops->obj, + opts->percent_type, + wops->set_color, wops->set_percent_color, + wops->set_jumps_percent_color, wops->printf, + wops->write_graph); } int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel, @@ -2579,7 +2746,7 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, 0, parch); + err = symbol__annotate(sym, map, evsel, 0, options, parch); if (err) goto out_free_offsets; @@ -2604,46 +2771,6 @@ out_free_offsets: return -1; } -int __annotation__scnprintf_samples_period(struct annotation *notes, - char *bf, size_t size, - struct perf_evsel *evsel, - bool show_freq) -{ - const char *ev_name = perf_evsel__name(evsel); - char buf[1024], ref[30] = " show reference callgraph, "; - char sample_freq_str[64] = ""; - unsigned long nr_samples = 0; - int nr_members = 1; - bool enable_ref = false; - u64 nr_events = 0; - char unit; - int i; - - if (perf_evsel__is_group_event(evsel)) { - perf_evsel__group_desc(evsel, buf, sizeof(buf)); - ev_name = buf; - nr_members = evsel->nr_members; - } - - for (i = 0; i < nr_members; i++) { - struct sym_hist *ah = annotation__histogram(notes, evsel->idx + i); - - nr_samples += ah->nr_samples; - nr_events += ah->period; - } - - if (symbol_conf.show_ref_callgraph && strstr(ev_name, "call-graph=no")) - enable_ref = true; - - if (show_freq) - scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq); - - nr_samples = convert_unit(nr_samples, &unit); - return scnprintf(bf, size, "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64, - nr_samples, unit, evsel->nr_members > 1 ? "s" : "", - ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events); -} - #define ANNOTATION__CFG(n) \ { .name = #n, .value = &annotation__default_options.n, } @@ -2708,3 +2835,55 @@ void annotation_config__init(void) annotation__default_options.show_total_period = symbol_conf.show_total_period; annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples; } + +static unsigned int parse_percent_type(char *str1, char *str2) +{ + unsigned int type = (unsigned int) -1; + + if (!strcmp("period", str1)) { + if (!strcmp("local", str2)) + type = PERCENT_PERIOD_LOCAL; + else if (!strcmp("global", str2)) + type = PERCENT_PERIOD_GLOBAL; + } + + if (!strcmp("hits", str1)) { + if (!strcmp("local", str2)) + type = PERCENT_HITS_LOCAL; + else if (!strcmp("global", str2)) + type = PERCENT_HITS_GLOBAL; + } + + return type; +} + +int annotate_parse_percent_type(const struct option *opt, const char *_str, + int unset __maybe_unused) +{ + struct annotation_options *opts = opt->value; + unsigned int type; + char *str1, *str2; + int err = -1; + + str1 = strdup(_str); + if (!str1) + return -ENOMEM; + + str2 = strchr(str1, '-'); + if (!str2) + goto out; + + *str2++ = 0; + + type = parse_percent_type(str1, str2); + if (type == (unsigned int) -1) + type = parse_percent_type(str2, str1); + if (type != (unsigned int) -1) { + opts->percent_type = type; + err = 0; + } + +out: + free(str1); + return err; +} diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f28a9e43421d..5399ba2321bb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -11,6 +11,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <pthread.h> +#include <asm/bug.h> struct ins_ops; @@ -21,6 +22,7 @@ struct ins { struct ins_operands { char *raw; + char *raw_comment; struct { char *raw; char *name; @@ -61,16 +63,28 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); #define ANNOTATION__IPC_WIDTH 6 #define ANNOTATION__CYCLES_WIDTH 6 +#define ANNOTATION__MINMAX_CYCLES_WIDTH 19 struct annotation_options { bool hide_src_code, use_offset, jump_arrows, + print_lines, + full_path, show_linenr, show_nr_jumps, show_nr_samples, - show_total_period; + show_total_period, + show_minmax_cycle, + show_asm_raw, + annotate_src; u8 offset_level; + int min_pcnt; + int max_lines; + int context; + const char *objdump_path; + const char *disassembler_style; + unsigned int percent_type; }; enum { @@ -90,8 +104,16 @@ struct sym_hist_entry { u64 period; }; +enum { + PERCENT_HITS_LOCAL, + PERCENT_HITS_GLOBAL, + PERCENT_PERIOD_LOCAL, + PERCENT_PERIOD_GLOBAL, + PERCENT_MAX, +}; + struct annotation_data { - double percent; + double percent[PERCENT_MAX]; double percent_sum; struct sym_hist_entry he; }; @@ -105,12 +127,14 @@ struct annotation_line { int jump_sources; float ipc; u64 cycles; + u64 cycles_max; + u64 cycles_min; size_t privsize; char *path; u32 idx; int idx_asm; - int samples_nr; - struct annotation_data samples[0]; + int data_nr; + struct annotation_data data[0]; }; struct disasm_line { @@ -121,6 +145,27 @@ struct disasm_line { struct annotation_line al; }; +static inline double annotation_data__percent(struct annotation_data *data, + unsigned int which) +{ + return which < PERCENT_MAX ? data->percent[which] : -1; +} + +static inline const char *percent_type_str(unsigned int type) +{ + static const char *str[PERCENT_MAX] = { + "local hits", + "global hits", + "local period", + "global period", + }; + + if (WARN_ON(type >= PERCENT_MAX)) + return "N/A"; + + return str[type]; +} + static inline struct disasm_line *disasm_line(struct annotation_line *al) { return al ? container_of(al, struct disasm_line, al) : NULL; @@ -156,22 +201,15 @@ struct annotation_write_ops { void (*write_graph)(void *obj, int graph); }; -double annotation_line__max_percent(struct annotation_line *al, struct annotation *notes); void annotation_line__write(struct annotation_line *al, struct annotation *notes, - struct annotation_write_ops *ops); + struct annotation_write_ops *ops, + struct annotation_options *opts); int __annotation__scnprintf_samples_period(struct annotation *notes, char *bf, size_t size, struct perf_evsel *evsel, bool show_freq); -static inline int annotation__scnprintf_samples_period(struct annotation *notes, - char *bf, size_t size, - struct perf_evsel *evsel) -{ - return __annotation__scnprintf_samples_period(notes, bf, size, evsel, true); -} - int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); @@ -186,6 +224,8 @@ struct cyc_hist { u64 start; u64 cycles; u64 cycles_aggr; + u64 cycles_max; + u64 cycles_min; u32 num; u32 num_aggr; u8 have_start; @@ -195,7 +235,11 @@ struct cyc_hist { /** struct annotated_source - symbols with hits have this attached as in sannotation * - * @histogram: Array of addr hit histograms per event being monitored + * @histograms: Array of addr hit histograms per event being monitored + * nr_histograms: This may not be the same as evsel->evlist->nr_entries if + * we have more than a group in a evlist, where we will want + * to see each group separately, that is why symbol__annotate2() + * sets src->nr_histograms to evsel->nr_members. * @lines: If 'print_lines' is specified, per source code line percentages * @source: source parsed from a disassembler like objdump -dS * @cyc_hist: Average cycles per basic block @@ -211,7 +255,7 @@ struct annotated_source { int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; - struct sym_hist histograms[0]; + struct sym_hist *histograms; }; struct annotation { @@ -239,6 +283,9 @@ struct annotation { static inline int annotation__cycles_width(struct annotation *notes) { + if (notes->have_cycles && notes->options->show_minmax_cycle) + return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH; + return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0; } @@ -258,10 +305,14 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym) void annotation__update_column_widths(struct annotation *notes); void annotation__init_column_widths(struct annotation *notes, struct symbol *sym); +static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx) +{ + return ((void *)src->histograms) + (src->sizeof_sym_hist * idx); +} + static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx) { - return (((void *)¬es->src->histograms) + - (notes->src->sizeof_sym_hist * idx)); + return annotated_source__histogram(notes->src, idx); } static inline struct annotation *symbol__annotation(struct symbol *sym) @@ -270,20 +321,21 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, - int evidx); + struct perf_evsel *evsel); int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, struct addr_map_symbol *start, unsigned cycles); int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample, - int evidx, u64 addr); + struct perf_evsel *evsel, u64 addr); -int symbol__alloc_hist(struct symbol *sym); +struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, + struct annotation_options *options, struct arch **parch); int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel, @@ -311,42 +363,41 @@ int symbol__strerror_disassemble(struct symbol *sym, struct map *map, int errnum, char *buf, size_t buflen); int symbol__annotate_printf(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool full_paths, - int min_pcnt, int max_lines, int context); -int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp); + struct perf_evsel *evsel, + struct annotation_options *options); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); void annotated_source__purge(struct annotated_source *as); -int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel); +int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel, + struct annotation_options *opts); bool ui__has_annotation(void); int symbol__tty_annotate(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths, int min_pcnt, int max_lines); + struct perf_evsel *evsel, struct annotation_options *opts); int symbol__tty_annotate2(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, bool print_lines, - bool full_paths); + struct perf_evsel *evsel, struct annotation_options *opts); #ifdef HAVE_SLANG_SUPPORT int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *opts); #else static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused, struct map *map __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt - __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *opts __maybe_unused) { return 0; } #endif -extern const char *disassembler_style; - void annotation_config__init(void); +int annotate_parse_percent_type(const struct option *opt, const char *_str, + int unset); #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 857de69a5361..72d5ba2479bf 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -56,6 +56,7 @@ #include "intel-pt.h" #include "intel-bts.h" #include "arm-spe.h" +#include "s390-cpumsf.h" #include "sane_ctype.h" #include "symbol/kallsyms.h" @@ -202,6 +203,9 @@ static int auxtrace_queues__grow(struct auxtrace_queues *queues, for (i = 0; i < queues->nr_queues; i++) { list_splice_tail(&queues->queue_array[i].head, &queue_array[i].head); + queue_array[i].tid = queues->queue_array[i].tid; + queue_array[i].cpu = queues->queue_array[i].cpu; + queue_array[i].set = queues->queue_array[i].set; queue_array[i].priv = queues->queue_array[i].priv; } @@ -902,9 +906,8 @@ out_free: return err; } -int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_auxtrace_info(struct perf_session *session, + union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; @@ -920,15 +923,16 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: return cs_etm__process_auxtrace_info(event, session); + case PERF_AUXTRACE_S390_CPUMSF: + return s390_cpumsf_process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; } } -s64 perf_event__process_auxtrace(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session) +s64 perf_event__process_auxtrace(struct perf_session *session, + union perf_event *event) { s64 err; @@ -944,7 +948,7 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE) return -EINVAL; - err = session->auxtrace->process_auxtrace_event(session, event, tool); + err = session->auxtrace->process_auxtrace_event(session, event, session->tool); if (err < 0) return err; @@ -958,16 +962,23 @@ s64 perf_event__process_auxtrace(struct perf_tool *tool, #define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64 #define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024 -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample) { - synth_opts->instructions = true; synth_opts->branches = true; synth_opts->transactions = true; synth_opts->ptwrites = true; synth_opts->pwr_events = true; synth_opts->errors = true; - synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; - synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + if (no_sample) { + synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS; + synth_opts->period = 1; + synth_opts->calls = true; + } else { + synth_opts->instructions = true; + synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + } synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ; synth_opts->initial_skip = 0; @@ -995,7 +1006,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, } if (!str) { - itrace_synth_opts__set_default(synth_opts); + itrace_synth_opts__set_default(synth_opts, false); return 0; } @@ -1179,9 +1190,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats) } } -int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_auxtrace_error(struct perf_session *session, + union perf_event *event) { if (auxtrace__dont_decode(session)) return 0; @@ -1190,11 +1200,12 @@ int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused, return 0; } -static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, +static int __auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, bool snapshot, size_t snapshot_size) { + struct auxtrace_mmap *mm = &map->auxtrace_mmap; u64 head, old = mm->prev, offset, ref; unsigned char *data = mm->base; size_t size, head_off, old_off, len1, len2, padding; @@ -1281,7 +1292,7 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, ev.auxtrace.tid = mm->tid; ev.auxtrace.cpu = mm->cpu; - if (fn(tool, &ev, data1, len1, data2, len2)) + if (fn(tool, map, &ev, data1, len1, data2, len2)) return -1; mm->prev = head; @@ -1300,18 +1311,18 @@ static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, return 1; } -int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn) { - return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0); + return __auxtrace_mmap__read(map, itr, tool, fn, false, 0); } -int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, +int auxtrace_mmap__read_snapshot(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size) { - return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size); + return __auxtrace_mmap__read(map, itr, tool, fn, true, snapshot_size); } /** @@ -1679,7 +1690,7 @@ struct sym_args { static bool kern_sym_match(struct sym_args *args, const char *name, char type) { /* A function with the same name, and global or the n'th found or any */ - return symbol_type__is_a(type, MAP__FUNCTION) && + return kallsyms__is_function(type) && !strcmp(name, args->name) && ((args->global && isupper(type)) || (args->selected && ++(args->cnt) == args->idx) || @@ -1784,7 +1795,7 @@ static int find_entire_kern_cb(void *arg, const char *name __maybe_unused, { struct sym_args *args = arg; - if (!symbol_type__is_a(type, MAP__FUNCTION)) + if (!kallsyms__is_function(type)) return 0; if (!args->started) { @@ -1915,7 +1926,7 @@ static void print_duplicate_syms(struct dso *dso, const char *sym_name) pr_err("Multiple symbols with name '%s'\n", sym_name); - sym = dso__first_symbol(dso, MAP__FUNCTION); + sym = dso__first_symbol(dso); while (sym) { if (dso_sym_match(sym, sym_name, &cnt, -1)) { pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n", @@ -1945,7 +1956,7 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, *start = 0; *size = 0; - sym = dso__first_symbol(dso, MAP__FUNCTION); + sym = dso__first_symbol(dso); while (sym) { if (*start) { if (!*size) @@ -1972,8 +1983,8 @@ static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start, static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso) { - struct symbol *first_sym = dso__first_symbol(dso, MAP__FUNCTION); - struct symbol *last_sym = dso__last_symbol(dso, MAP__FUNCTION); + struct symbol *first_sym = dso__first_symbol(dso); + struct symbol *last_sym = dso__last_symbol(dso); if (!first_sym || !last_sym) { pr_err("Failed to determine filter for %s\nNo symbols found.\n", diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e731f55da072..8e50f96d4b23 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -23,6 +23,7 @@ #include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> +#include <asm/bitsperlong.h> #include "../perf.h" #include "event.h" @@ -33,6 +34,7 @@ union perf_event; struct perf_session; struct perf_evlist; struct perf_tool; +struct perf_mmap; struct option; struct record_opts; struct auxtrace_info_event; @@ -44,6 +46,7 @@ enum auxtrace_type { PERF_AUXTRACE_INTEL_BTS, PERF_AUXTRACE_CS_ETM, PERF_AUXTRACE_ARM_SPE, + PERF_AUXTRACE_S390_CPUMSF, }; enum itrace_period_type { @@ -55,6 +58,7 @@ enum itrace_period_type { /** * struct itrace_synth_opts - AUX area tracing synthesis options. * @set: indicates whether or not options have been set + * @default_no_sample: Default to no sampling. * @inject: indicates the event (not just the sample) must be fully synthesized * because 'perf inject' will write it out * @instructions: whether to synthesize 'instructions' events @@ -79,6 +83,7 @@ enum itrace_period_type { */ struct itrace_synth_opts { bool set; + bool default_no_sample; bool inject; bool instructions; bool branches; @@ -433,13 +438,14 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, bool per_cpu); typedef int (*process_auxtrace_t)(struct perf_tool *tool, + struct perf_mmap *map, union perf_event *event, void *data1, size_t len1, void *data2, size_t len2); -int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, +int auxtrace_mmap__read(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn); -int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, +int auxtrace_mmap__read_snapshot(struct perf_mmap *map, struct auxtrace_record *itr, struct perf_tool *tool, process_auxtrace_t fn, size_t snapshot_size); @@ -516,18 +522,16 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); -int perf_event__process_auxtrace_info(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); -s64 perf_event__process_auxtrace(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); -int perf_event__process_auxtrace_error(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_auxtrace_info(struct perf_session *session, + union perf_event *event); +s64 perf_event__process_auxtrace(struct perf_session *session, + union perf_event *event); +int perf_event__process_auxtrace_error(struct perf_session *session, + union perf_event *event); int itrace_parse_synth_opts(const struct option *opt, const char *str, int unset); -void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts); +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts, + bool no_sample); size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp); void perf_session__auxtrace_error_inc(struct perf_session *session, @@ -576,6 +580,23 @@ static inline void auxtrace__free(struct perf_session *session) return session->auxtrace->free(session); } +#define ITRACE_HELP \ +" i: synthesize instructions events\n" \ +" b: synthesize branches events\n" \ +" c: synthesize branches events (calls only)\n" \ +" r: synthesize branches events (returns only)\n" \ +" x: synthesize transactions events\n" \ +" w: synthesize ptwrite events\n" \ +" p: synthesize power events\n" \ +" e: synthesize error events\n" \ +" d: create a debug log\n" \ +" g[len]: synthesize a call chain (use with i or x)\n" \ +" l[len]: synthesize last branch entries (use with i or x)\n" \ +" sNUMBER: skip initial number of events\n" \ +" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ +" concatenate multiple options. Default is ibxwpe or cewp\n" + + #else static inline struct auxtrace_record * @@ -716,6 +737,8 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, struct perf_evlist *evlist, int idx, bool per_cpu); +#define ITRACE_HELP "" + #endif #endif diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index cee658733e2c..f9ae1a993806 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -747,7 +747,9 @@ int bpf__load(struct bpf_object *obj) err = bpf_object__load(obj); if (err) { - pr_debug("bpf: load objects failed\n"); + char bf[128]; + libbpf_strerror(err, bf, sizeof(bf)); + pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf); return err; } return 0; @@ -1527,13 +1529,13 @@ int bpf__apply_obj_config(void) bpf_object__for_each_safe(obj, objtmp) \ bpf_map__for_each(pos, obj) -#define bpf__for_each_stdout_map(pos, obj, objtmp) \ +#define bpf__for_each_map_named(pos, obj, objtmp, name) \ bpf__for_each_map(pos, obj, objtmp) \ if (bpf_map__name(pos) && \ - (strcmp("__bpf_stdout__", \ + (strcmp(name, \ bpf_map__name(pos)) == 0)) -int bpf__setup_stdout(struct perf_evlist *evlist) +struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name) { struct bpf_map_priv *tmpl_priv = NULL; struct bpf_object *obj, *tmp; @@ -1542,11 +1544,11 @@ int bpf__setup_stdout(struct perf_evlist *evlist) int err; bool need_init = false; - bpf__for_each_stdout_map(map, obj, tmp) { + bpf__for_each_map_named(map, obj, tmp, name) { struct bpf_map_priv *priv = bpf_map__priv(map); if (IS_ERR(priv)) - return -BPF_LOADER_ERRNO__INTERNAL; + return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); /* * No need to check map type: type should have been @@ -1559,49 +1561,61 @@ int bpf__setup_stdout(struct perf_evlist *evlist) } if (!need_init) - return 0; + return NULL; if (!tmpl_priv) { - err = parse_events(evlist, "bpf-output/no-inherit=1,name=__bpf_stdout__/", - NULL); + char *event_definition = NULL; + + if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0) + return ERR_PTR(-ENOMEM); + + err = parse_events(evlist, event_definition, NULL); + free(event_definition); + if (err) { - pr_debug("ERROR: failed to create bpf-output event\n"); - return -err; + pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name); + return ERR_PTR(-err); } evsel = perf_evlist__last(evlist); } - bpf__for_each_stdout_map(map, obj, tmp) { + bpf__for_each_map_named(map, obj, tmp, name) { struct bpf_map_priv *priv = bpf_map__priv(map); if (IS_ERR(priv)) - return -BPF_LOADER_ERRNO__INTERNAL; + return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL); if (priv) continue; if (tmpl_priv) { priv = bpf_map_priv__clone(tmpl_priv); if (!priv) - return -ENOMEM; + return ERR_PTR(-ENOMEM); err = bpf_map__set_priv(map, priv, bpf_map_priv__clear); if (err) { bpf_map_priv__clear(map, priv); - return err; + return ERR_PTR(err); } } else if (evsel) { struct bpf_map_op *op; op = bpf_map__add_newop(map, NULL); if (IS_ERR(op)) - return PTR_ERR(op); + return ERR_PTR(PTR_ERR(op)); op->op_type = BPF_MAP_OP_SET_EVSEL; op->v.evsel = evsel; } } - return 0; + return evsel; +} + +int bpf__setup_stdout(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__"); + return PTR_ERR_OR_ZERO(evsel); } #define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START) @@ -1778,8 +1792,8 @@ int bpf__strerror_apply_obj_config(int err, char *buf, size_t size) return 0; } -int bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, - int err, char *buf, size_t size) +int bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused, + int err, char *buf, size_t size) { bpf__strerror_head(err, buf, size); bpf__strerror_end(buf, size); diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 5d3aefd6fae7..62d245a90e1d 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h @@ -43,6 +43,7 @@ enum bpf_loader_errno { __BPF_LOADER_ERRNO__END, }; +struct perf_evsel; struct bpf_object; struct parse_events_term; #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" @@ -82,9 +83,8 @@ int bpf__apply_obj_config(void); int bpf__strerror_apply_obj_config(int err, char *buf, size_t size); int bpf__setup_stdout(struct perf_evlist *evlist); -int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, - char *buf, size_t size); - +struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name); +int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); #else #include <errno.h> @@ -138,6 +138,12 @@ bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) return 0; } +static inline struct perf_evsel * +bpf__setup_output_event(struct perf_evlist *evlist __maybe_unused, const char *name __maybe_unused) +{ + return NULL; +} + static inline int __bpf_strerror(char *buf, size_t size) { @@ -193,11 +199,16 @@ bpf__strerror_apply_obj_config(int err __maybe_unused, } static inline int -bpf__strerror_setup_stdout(struct perf_evlist *evlist __maybe_unused, - int err __maybe_unused, char *buf, - size_t size) +bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused, + int err __maybe_unused, char *buf, size_t size) { return __bpf_strerror(buf, size); } + #endif + +static inline int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, char *buf, size_t size) +{ + return bpf__strerror_setup_output_event(evlist, err, buf, size); +} #endif diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c index 29347756b0af..77e4891e17b0 100644 --- a/tools/perf/util/bpf-prologue.c +++ b/tools/perf/util/bpf-prologue.c @@ -61,7 +61,7 @@ check_pos(struct bpf_insn_pos *pos) /* * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see - * Documentation/trace/kprobetrace.txt) to size field of BPF_LDX_MEM + * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM * instruction (BPF_{B,H,W,DW}). */ static int diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 537eadd81914..04b1d53e4bf9 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -47,9 +47,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, return -1; } - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al); - - if (al.map != NULL) + if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) al.map->dso->hit = 1; thread__put(thread); diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index bf31ceab33bd..89512504551b 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module) raw_svector_ostream ostream(*Buffer); legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile)) { + bool NotAdded; +#if CLANG_VERSION_MAJOR < 7 + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile); +#else + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, + TargetMachine::CGFT_ObjectFile); +#endif + if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);; } diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index decb91f9da82..ccd02634a616 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -93,20 +93,17 @@ static int open_cgroup(const char *name) static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str) { struct perf_evsel *counter; - struct cgroup *cgrp = NULL; /* * check if cgrp is already defined, if so we reuse it */ evlist__for_each_entry(evlist, counter) { if (!counter->cgrp) continue; - if (!strcmp(counter->cgrp->name, str)) { - cgrp = cgroup__get(counter->cgrp); - break; - } + if (!strcmp(counter->cgrp->name, str)) + return cgroup__get(counter->cgrp); } - return cgrp; + return NULL; } static struct cgroup *cgroup__new(const char *name) diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 7798a2cc8a86..31279a7bd919 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -20,9 +20,10 @@ static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,} static struct comm_str *comm_str__get(struct comm_str *cs) { - if (cs) - refcount_inc(&cs->refcnt); - return cs; + if (cs && refcount_inc_not_zero(&cs->refcnt)) + return cs; + + return NULL; } static void comm_str__put(struct comm_str *cs) @@ -67,9 +68,14 @@ struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root) parent = *p; iter = rb_entry(parent, struct comm_str, rb_node); + /* + * If we race with comm_str__put, iter->refcnt is 0 + * and it will be removed within comm_str__put call + * shortly, ignore it in this search. + */ cmp = strcmp(str, iter->str); - if (!cmp) - return comm_str__get(iter); + if (!cmp && comm_str__get(iter)) + return iter; if (cmp < 0) p = &(*p)->rb_left; diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index ecca688a25fb..892e92e7e7fc 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -4,10 +4,12 @@ #ifdef HAVE_ZLIB_SUPPORT int gzip_decompress_to_file(const char *input, int output_fd); +bool gzip_is_compressed(const char *input); #endif #ifdef HAVE_LZMA_SUPPORT int lzma_decompress_to_file(const char *input, int output_fd); +bool lzma_is_compressed(const char *input); #endif #endif /* PERF_COMPRESS_H */ diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 84eb9393c7db..5ac157056cdf 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -707,6 +707,14 @@ struct perf_config_set *perf_config_set__new(void) return set; } +static int perf_config__init(void) +{ + if (config_set == NULL) + config_set = perf_config_set__new(); + + return config_set == NULL; +} + int perf_config(config_fn_t fn, void *data) { int ret = 0; @@ -714,7 +722,7 @@ int perf_config(config_fn_t fn, void *data) struct perf_config_section *section; struct perf_config_item *item; - if (config_set == NULL) + if (config_set == NULL && perf_config__init()) return -1; perf_config_set__for_each_entry(config_set, section, item) { @@ -735,12 +743,6 @@ int perf_config(config_fn_t fn, void *data) return ret; } -void perf_config__init(void) -{ - if (config_set == NULL) - config_set = perf_config_set__new(); -} - void perf_config__exit(void) { perf_config_set__delete(config_set); diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index baf82bf227ac..bd0a5897c76a 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -38,7 +38,6 @@ struct perf_config_set *perf_config_set__new(void); void perf_config_set__delete(struct perf_config_set *set); int perf_config_set__collect(struct perf_config_set *set, const char *file_name, const char *var, const char *value); -void perf_config__init(void); void perf_config__exit(void); void perf_config__refresh(void); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 4d5fc374e730..938def6d0bb9 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -31,6 +31,8 @@ #endif #endif +#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL + struct cs_etm_decoder { void *data; void (*packet_printer)(const char *msg); @@ -261,8 +263,8 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) decoder->tail = 0; decoder->packet_count = 0; for (i = 0; i < MAX_BUFFER; i++) { - decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; + decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; decoder->packet_buffer[i].last_instr_taken_branch = false; decoder->packet_buffer[i].exc = false; decoder->packet_buffer[i].exc_ret = false; @@ -295,8 +297,8 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, decoder->packet_buffer[et].exc = false; decoder->packet_buffer[et].exc_ret = false; decoder->packet_buffer[et].cpu = *((int *)inode->priv); - decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL; - decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; + decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; if (decoder->packet_count == MAX_BUFFER - 1) return OCSD_RESP_WAIT; diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index 743f5f444304..612b5755f742 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -23,6 +23,7 @@ struct cs_etm_buffer { }; enum cs_etm_sample_type { + CS_ETM_EMPTY = 0, CS_ETM_RANGE = 1 << 0, CS_ETM_TRACE_ON = 1 << 1, }; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index bf16dc9ee507..73430b73570d 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -244,6 +244,27 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address) +{ + struct machine *machine; + + machine = etmq->etm->machine; + + if (address >= etmq->etm->kernel_start) { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_KERNEL; + else + return PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (machine__is_host(machine)) + return PERF_RECORD_MISC_USER; + else if (perf_guest) + return PERF_RECORD_MISC_GUEST_USER; + else + return PERF_RECORD_MISC_HYPERVISOR; + } +} + static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, size_t size, u8 *buffer) { @@ -258,10 +279,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, return -1; machine = etmq->etm->machine; - if (address >= etmq->etm->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = cs_etm__cpu_mode(etmq, address); thread = etmq->thread; if (!thread) { @@ -270,9 +288,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, thread = etmq->etm->unknown_thread; } - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); - - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso) return 0; if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && @@ -496,6 +512,10 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) { + /* Returns 0 for the CS_ETM_TRACE_ON packet */ + if (packet->sample_type == CS_ETM_TRACE_ON) + return 0; + /* * The packet records the execution range with an exclusive end address * @@ -507,6 +527,15 @@ static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet) return packet->end_addr - A64_INSTR_SIZE; } +static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet) +{ + /* Returns 0 for the CS_ETM_TRACE_ON packet */ + if (packet->sample_type == CS_ETM_TRACE_ON) + return 0; + + return packet->start_addr; +} + static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet) { /* @@ -548,7 +577,7 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) be = &bs->entries[etmq->last_branch_pos]; be->from = cs_etm__last_executed_instr(etmq->prev_packet); - be->to = etmq->packet->start_addr; + be->to = cs_etm__first_executed_instr(etmq->packet); /* No support for mispredict */ be->flags.mispred = 0; be->flags.predicted = 1; @@ -642,7 +671,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct perf_sample sample = {.ip = 0,}; event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr); event->sample.header.size = sizeof(struct perf_event_header); sample.ip = addr; @@ -654,7 +683,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, sample.cpu = etmq->packet->cpu; sample.flags = 0; sample.insn_len = 1; - sample.cpumode = event->header.misc; + sample.cpumode = event->sample.header.misc; if (etm->synth_opts.last_branch) { cs_etm__copy_last_branch_rb(etmq); @@ -695,21 +724,24 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) u64 nr; struct branch_entry entries; } dummy_bs; + u64 ip; + + ip = cs_etm__last_executed_instr(etmq->prev_packet); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); event->sample.header.size = sizeof(struct perf_event_header); - sample.ip = cs_etm__last_executed_instr(etmq->prev_packet); + sample.ip = ip; sample.pid = etmq->pid; sample.tid = etmq->tid; - sample.addr = etmq->packet->start_addr; + sample.addr = cs_etm__first_executed_instr(etmq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; sample.period = 1; sample.cpu = etmq->packet->cpu; sample.flags = 0; - sample.cpumode = PERF_RECORD_MISC_USER; + sample.cpumode = event->sample.header.misc; /* * perf report cannot handle events without a branch stack @@ -899,13 +931,23 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) etmq->period_instructions = instrs_over; } - if (etm->sample_branches && - etmq->prev_packet && - etmq->prev_packet->sample_type == CS_ETM_RANGE && - etmq->prev_packet->last_instr_taken_branch) { - ret = cs_etm__synth_branch_sample(etmq); - if (ret) - return ret; + if (etm->sample_branches && etmq->prev_packet) { + bool generate_sample = false; + + /* Generate sample for tracing on packet */ + if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON) + generate_sample = true; + + /* Generate sample for branch taken packet */ + if (etmq->prev_packet->sample_type == CS_ETM_RANGE && + etmq->prev_packet->last_instr_taken_branch) + generate_sample = true; + + if (generate_sample) { + ret = cs_etm__synth_branch_sample(etmq); + if (ret) + return ret; + } } if (etm->sample_branches || etm->synth_opts.last_branch) { @@ -924,10 +966,17 @@ static int cs_etm__sample(struct cs_etm_queue *etmq) static int cs_etm__flush(struct cs_etm_queue *etmq) { int err = 0; + struct cs_etm_auxtrace *etm = etmq->etm; struct cs_etm_packet *tmp; + if (!etmq->prev_packet) + return 0; + + /* Handle start tracing packet */ + if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) + goto swap_packet; + if (etmq->etm->synth_opts.last_branch && - etmq->prev_packet && etmq->prev_packet->sample_type == CS_ETM_RANGE) { /* * Generate a last branch event for the branches left in the @@ -941,8 +990,22 @@ static int cs_etm__flush(struct cs_etm_queue *etmq) err = cs_etm__synth_instruction_sample( etmq, addr, etmq->period_instructions); + if (err) + return err; + etmq->period_instructions = 0; + } + + if (etm->sample_branches && + etmq->prev_packet->sample_type == CS_ETM_RANGE) { + err = cs_etm__synth_branch_sample(etmq); + if (err) + return err; + } + +swap_packet: + if (etmq->etm->synth_opts.last_branch) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. @@ -1022,6 +1085,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) */ cs_etm__flush(etmq); break; + case CS_ETM_EMPTY: + /* + * Should not receive empty packet, + * report error. + */ + pr_err("CS ETM Trace: empty packet\n"); + return -EINVAL; default: break; } @@ -1383,7 +1453,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { etm->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&etm->synth_opts); + itrace_synth_opts__set_default(&etm->synth_opts, + session->itrace_synth_opts->default_no_sample); etm->synth_opts.callchain = false; } diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 5744c12641a5..2a36fab76994 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -182,20 +182,20 @@ err_put_field: } static struct bt_ctf_field_type* -get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +get_tracepoint_field_type(struct ctf_writer *cw, struct tep_format_field *field) { unsigned long flags = field->flags; - if (flags & FIELD_IS_STRING) + if (flags & TEP_FIELD_IS_STRING) return cw->data.string; - if (!(flags & FIELD_IS_SIGNED)) { + if (!(flags & TEP_FIELD_IS_SIGNED)) { /* unsigned long are mostly pointers */ - if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + if (flags & TEP_FIELD_IS_LONG || flags & TEP_FIELD_IS_POINTER) return cw->data.u64_hex; } - if (flags & FIELD_IS_SIGNED) { + if (flags & TEP_FIELD_IS_SIGNED) { if (field->size == 8) return cw->data.s64; else @@ -287,7 +287,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, struct perf_sample *sample, - struct format_field *fmtf) + struct tep_format_field *fmtf) { struct bt_ctf_field_type *type; struct bt_ctf_field *array_field; @@ -304,20 +304,20 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, name = fmtf->alias; offset = fmtf->offset; len = fmtf->size; - if (flags & FIELD_IS_STRING) - flags &= ~FIELD_IS_ARRAY; + if (flags & TEP_FIELD_IS_STRING) + flags &= ~TEP_FIELD_IS_ARRAY; - if (flags & FIELD_IS_DYNAMIC) { + if (flags & TEP_FIELD_IS_DYNAMIC) { unsigned long long tmp_val; - tmp_val = pevent_read_number(fmtf->event->pevent, - data + offset, len); + tmp_val = tep_read_number(fmtf->event->pevent, + data + offset, len); offset = tmp_val; len = offset >> 16; offset &= 0xffff; } - if (flags & FIELD_IS_ARRAY) { + if (flags & TEP_FIELD_IS_ARRAY) { type = bt_ctf_event_class_get_field_by_name( event_class, name); @@ -338,7 +338,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, type = get_tracepoint_field_type(cw, fmtf); for (i = 0; i < n_items; i++) { - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) field = bt_ctf_field_array_get_field(array_field, i); else field = bt_ctf_field_create(type); @@ -348,16 +348,16 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, return -1; } - if (flags & FIELD_IS_STRING) + if (flags & TEP_FIELD_IS_STRING) ret = string_set_value(field, data + offset + i * len); else { unsigned long long value_int; - value_int = pevent_read_number( + value_int = tep_read_number( fmtf->event->pevent, data + offset + i * len, len); - if (!(flags & FIELD_IS_SIGNED)) + if (!(flags & TEP_FIELD_IS_SIGNED)) ret = bt_ctf_field_unsigned_integer_set_value( field, value_int); else @@ -369,7 +369,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, pr_err("failed to set file value %s\n", name); goto err_put_field; } - if (!(flags & FIELD_IS_ARRAY)) { + if (!(flags & TEP_FIELD_IS_ARRAY)) { ret = bt_ctf_event_set_payload(event, name, field); if (ret) { pr_err("failed to set payload %s\n", name); @@ -378,7 +378,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw, } bt_ctf_field_put(field); } - if (flags & FIELD_IS_ARRAY) { + if (flags & TEP_FIELD_IS_ARRAY) { ret = bt_ctf_event_set_payload(event, name, array_field); if (ret) { pr_err("Failed add payload array %s\n", name); @@ -396,10 +396,10 @@ err_put_field: static int add_tracepoint_fields_values(struct ctf_writer *cw, struct bt_ctf_event_class *event_class, struct bt_ctf_event *event, - struct format_field *fields, + struct tep_format_field *fields, struct perf_sample *sample) { - struct format_field *field; + struct tep_format_field *field; int ret; for (field = fields; field; field = field->next) { @@ -417,8 +417,8 @@ static int add_tracepoint_values(struct ctf_writer *cw, struct perf_evsel *evsel, struct perf_sample *sample) { - struct format_field *common_fields = evsel->tp_format->format.common_fields; - struct format_field *fields = evsel->tp_format->format.fields; + struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; + struct tep_format_field *fields = evsel->tp_format->format.fields; int ret; ret = add_tracepoint_fields_values(cw, event_class, event, @@ -970,7 +970,7 @@ out: static int event_class_add_field(struct bt_ctf_event_class *event_class, struct bt_ctf_field_type *type, - struct format_field *field) + struct tep_format_field *field) { struct bt_ctf_field_type *t = NULL; char *name; @@ -1009,10 +1009,10 @@ static int event_class_add_field(struct bt_ctf_event_class *event_class, } static int add_tracepoint_fields_types(struct ctf_writer *cw, - struct format_field *fields, + struct tep_format_field *fields, struct bt_ctf_event_class *event_class) { - struct format_field *field; + struct tep_format_field *field; int ret; for (field = fields; field; field = field->next) { @@ -1030,15 +1030,15 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw, * type and don't care that it is an array. What we don't * support is an array of strings. */ - if (flags & FIELD_IS_STRING) - flags &= ~FIELD_IS_ARRAY; + if (flags & TEP_FIELD_IS_STRING) + flags &= ~TEP_FIELD_IS_ARRAY; - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) type = bt_ctf_field_type_array_create(type, field->arraylen); ret = event_class_add_field(event_class, type, field); - if (flags & FIELD_IS_ARRAY) + if (flags & TEP_FIELD_IS_ARRAY) bt_ctf_field_type_put(type); if (ret) { @@ -1055,8 +1055,8 @@ static int add_tracepoint_types(struct ctf_writer *cw, struct perf_evsel *evsel, struct bt_ctf_event_class *class) { - struct format_field *common_fields = evsel->tp_format->format.common_fields; - struct format_field *fields = evsel->tp_format->format.fields; + struct tep_format_field *common_fields = evsel->tp_format->format.common_fields; + struct tep_format_field *fields = evsel->tp_format->format.fields; int ret; ret = add_tracepoint_fields_types(cw, common_fields, class); @@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, { struct perf_session *session; struct perf_data data = { - .file.path = input, + .file = { .path = input, .fd = -1 }, .mode = PERF_DATA_MODE_READ, .force = opts->force, }; diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index b0c2b5c5d337..69fbb0a72d0c 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -247,9 +247,9 @@ static int db_ids_from_al(struct db_export *dbe, struct addr_location *al, *dso_db_id = dso->db_id; if (!al->sym) { - al->sym = symbol__new(al->addr, 0, 0, "unknown"); + al->sym = symbol__new(al->addr, 0, 0, 0, "unknown"); if (al->sym) - dso__insert_symbol(dso, al->map->type, al->sym); + dso__insert_symbol(dso, al->sym); } if (al->sym) { @@ -315,8 +315,7 @@ static struct call_path *call_path_from_sample(struct db_export *dbe, al.addr = node->ip; if (al.map && !al.sym) - al.sym = dso__find_symbol(al.map->dso, MAP__FUNCTION, - al.addr); + al.sym = dso__find_symbol(al.map->dso, al.addr); db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset); @@ -464,6 +463,28 @@ int db_export__branch_types(struct db_export *dbe) if (err) break; } + + /* Add trace begin / end variants */ + for (i = 0; branch_types[i].name ; i++) { + const char *name = branch_types[i].name; + u32 type = branch_types[i].branch_type; + char buf[64]; + + if (type == PERF_IP_FLAG_BRANCH || + (type & (PERF_IP_FLAG_TRACE_BEGIN | PERF_IP_FLAG_TRACE_END))) + continue; + + snprintf(buf, sizeof(buf), "trace begin / %s", name); + err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_BEGIN, buf); + if (err) + break; + + snprintf(buf, sizeof(buf), "%s / trace end", name); + err = db_export__branch_type(dbe, type | PERF_IP_FLAG_TRACE_END, buf); + if (err) + break; + } + return err; } diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 36ef45b2e89d..bbed90e5d9bb 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -189,28 +189,34 @@ int dso__read_binary_type_filename(const struct dso *dso, return ret; } +enum { + COMP_ID__NONE = 0, +}; + static const struct { const char *fmt; int (*decompress)(const char *input, int output); + bool (*is_compressed)(const char *input); } compressions[] = { + [COMP_ID__NONE] = { .fmt = NULL, }, #ifdef HAVE_ZLIB_SUPPORT - { "gz", gzip_decompress_to_file }, + { "gz", gzip_decompress_to_file, gzip_is_compressed }, #endif #ifdef HAVE_LZMA_SUPPORT - { "xz", lzma_decompress_to_file }, + { "xz", lzma_decompress_to_file, lzma_is_compressed }, #endif - { NULL, NULL }, + { NULL, NULL, NULL }, }; -bool is_supported_compression(const char *ext) +static int is_supported_compression(const char *ext) { unsigned i; - for (i = 0; compressions[i].fmt; i++) { + for (i = 1; compressions[i].fmt; i++) { if (!strcmp(ext, compressions[i].fmt)) - return true; + return i; } - return false; + return COMP_ID__NONE; } bool is_kernel_module(const char *pathname, int cpumode) @@ -239,80 +245,73 @@ bool is_kernel_module(const char *pathname, int cpumode) return m.kmod; } -bool decompress_to_file(const char *ext, const char *filename, int output_fd) -{ - unsigned i; - - for (i = 0; compressions[i].fmt; i++) { - if (!strcmp(ext, compressions[i].fmt)) - return !compressions[i].decompress(filename, - output_fd); - } - return false; -} - bool dso__needs_decompress(struct dso *dso) { return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; } -static int decompress_kmodule(struct dso *dso, const char *name, char *tmpbuf) +static int decompress_kmodule(struct dso *dso, const char *name, + char *pathname, size_t len) { + char tmpbuf[] = KMOD_DECOMP_NAME; int fd = -1; - struct kmod_path m; if (!dso__needs_decompress(dso)) return -1; - if (kmod_path__parse_ext(&m, dso->long_name)) + if (dso->comp == COMP_ID__NONE) return -1; - if (!m.comp) - goto out; + /* + * We have proper compression id for DSO and yet the file + * behind the 'name' can still be plain uncompressed object. + * + * The reason is behind the logic we open the DSO object files, + * when we try all possible 'debug' objects until we find the + * data. So even if the DSO is represented by 'krava.xz' module, + * we can end up here opening ~/.debug/....23432432/debug' file + * which is not compressed. + * + * To keep this transparent, we detect this and return the file + * descriptor to the uncompressed file. + */ + if (!compressions[dso->comp].is_compressed(name)) + return open(name, O_RDONLY); fd = mkstemp(tmpbuf); if (fd < 0) { dso->load_errno = errno; - goto out; + return -1; } - if (!decompress_to_file(m.ext, name, fd)) { + if (compressions[dso->comp].decompress(name, fd)) { dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } -out: - free(m.ext); + if (!pathname || (fd < 0)) + unlink(tmpbuf); + + if (pathname && (fd >= 0)) + strncpy(pathname, tmpbuf, len); + return fd; } int dso__decompress_kmodule_fd(struct dso *dso, const char *name) { - char tmpbuf[] = KMOD_DECOMP_NAME; - int fd; - - fd = decompress_kmodule(dso, name, tmpbuf); - unlink(tmpbuf); - return fd; + return decompress_kmodule(dso, name, NULL, 0); } int dso__decompress_kmodule_path(struct dso *dso, const char *name, char *pathname, size_t len) { - char tmpbuf[] = KMOD_DECOMP_NAME; - int fd; - - fd = decompress_kmodule(dso, name, tmpbuf); - if (fd < 0) { - unlink(tmpbuf); - return -1; - } + int fd = decompress_kmodule(dso, name, pathname, len); - strncpy(pathname, tmpbuf, len); close(fd); - return 0; + return fd >= 0 ? 0 : -1; } /* @@ -332,7 +331,7 @@ int dso__decompress_kmodule_path(struct dso *dso, const char *name, * Returns 0 if there's no strdup error, -ENOMEM otherwise. */ int __kmod_path__parse(struct kmod_path *m, const char *path, - bool alloc_name, bool alloc_ext) + bool alloc_name) { const char *name = strrchr(path, '/'); const char *ext = strrchr(path, '.'); @@ -354,6 +353,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) || (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) || (strncmp(name, "[vdso]", 6) == 0) || + (strncmp(name, "[vdso32]", 8) == 0) || + (strncmp(name, "[vdsox32]", 9) == 0) || (strncmp(name, "[vsyscall]", 10) == 0)) { m->kmod = false; @@ -370,10 +371,9 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, return 0; } - if (is_supported_compression(ext + 1)) { - m->comp = true; + m->comp = is_supported_compression(ext + 1); + if (m->comp > COMP_ID__NONE) ext -= 3; - } /* Check .ko extension only if there's enough name left. */ if (ext > name) @@ -391,14 +391,6 @@ int __kmod_path__parse(struct kmod_path *m, const char *path, strxfrchar(m->name, '-', '_'); } - if (alloc_ext && m->comp) { - m->ext = strdup(ext + 4); - if (!m->ext) { - free((void *) m->name); - return -ENOMEM; - } - } - return 0; } @@ -411,8 +403,10 @@ void dso__set_module_info(struct dso *dso, struct kmod_path *m, dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; /* _KMODULE_COMP should be next to _KMODULE */ - if (m->kmod && m->comp) + if (m->kmod && m->comp) { dso->symtab_type++; + dso->comp = m->comp; + } dso__set_short_name(dso, strdup(m->name), true); } @@ -466,6 +460,7 @@ static int __open_dso(struct dso *dso, struct machine *machine) int fd = -EINVAL; char *root_dir = (char *)""; char *name = malloc(PATH_MAX); + bool decomp = false; if (!name) return -ENOMEM; @@ -489,12 +484,13 @@ static int __open_dso(struct dso *dso, struct machine *machine) goto out; } + decomp = true; strcpy(name, newpath); } fd = do_open(name); - if (dso__needs_decompress(dso)) + if (decomp) unlink(name); out: @@ -1014,7 +1010,7 @@ struct map *dso__new_map(const char *name) struct dso *dso = dso__new(name); if (dso) - map = map__new2(0, dso, MAP__FUNCTION); + map = map__new2(0, dso); return map; } @@ -1176,19 +1172,19 @@ int dso__name_len(const struct dso *dso) return dso->short_name_len; } -bool dso__loaded(const struct dso *dso, enum map_type type) +bool dso__loaded(const struct dso *dso) { - return dso->loaded & (1 << type); + return dso->loaded; } -bool dso__sorted_by_name(const struct dso *dso, enum map_type type) +bool dso__sorted_by_name(const struct dso *dso) { - return dso->sorted_by_name & (1 << type); + return dso->sorted_by_name; } -void dso__set_sorted_by_name(struct dso *dso, enum map_type type) +void dso__set_sorted_by_name(struct dso *dso) { - dso->sorted_by_name |= (1 << type); + dso->sorted_by_name = true; } struct dso *dso__new(const char *name) @@ -1196,12 +1192,10 @@ struct dso *dso__new(const char *name) struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1); if (dso != NULL) { - int i; strcpy(dso->name, name); dso__set_long_name(dso, dso->name, false); dso__set_short_name(dso, dso->name, false); - for (i = 0; i < MAP__NR_TYPES; ++i) - dso->symbols[i] = dso->symbol_names[i] = RB_ROOT; + dso->symbols = dso->symbol_names = RB_ROOT; dso->data.cache = RB_ROOT; dso->inlined_nodes = RB_ROOT; dso->srclines = RB_ROOT; @@ -1218,6 +1212,7 @@ struct dso *dso__new(const char *name) dso->a2l_fails = 1; dso->kernel = DSO_TYPE_USER; dso->needs_swap = DSO_SWAP__UNSET; + dso->comp = COMP_ID__NONE; RB_CLEAR_NODE(&dso->rb_node); dso->root = NULL; INIT_LIST_HEAD(&dso->node); @@ -1231,8 +1226,6 @@ struct dso *dso__new(const char *name) void dso__delete(struct dso *dso) { - int i; - if (!RB_EMPTY_NODE(&dso->rb_node)) pr_err("DSO %s is still in rbtree when being deleted!\n", dso->long_name); @@ -1240,8 +1233,7 @@ void dso__delete(struct dso *dso) /* free inlines first, as they reference symbols */ inlines__tree_delete(&dso->inlined_nodes); srcline__tree_delete(&dso->srclines); - for (i = 0; i < MAP__NR_TYPES; ++i) - symbols__delete(&dso->symbols[i]); + symbols__delete(&dso->symbols); if (dso->short_name_allocated) { zfree((char **)&dso->short_name); @@ -1451,9 +1443,7 @@ size_t __dsos__fprintf(struct list_head *head, FILE *fp) size_t ret = 0; list_for_each_entry(pos, head, node) { - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) - ret += dso__fprintf(pos, i, fp); + ret += dso__fprintf(pos, fp); } return ret; @@ -1467,18 +1457,17 @@ size_t dso__fprintf_buildid(struct dso *dso, FILE *fp) return fprintf(fp, "%s", sbuild_id); } -size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) +size_t dso__fprintf(struct dso *dso, FILE *fp) { struct rb_node *nd; size_t ret = fprintf(fp, "dso: %s (", dso->short_name); if (dso->short_name != dso->long_name) ret += fprintf(fp, "%s, ", dso->long_name); - ret += fprintf(fp, "%s, %sloaded, ", map_type__name[type], - dso__loaded(dso, type) ? "" : "NOT "); + ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); ret += dso__fprintf_buildid(dso, fp); ret += fprintf(fp, ")\n"); - for (nd = rb_first(&dso->symbols[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); ret += symbol__fprintf(pos, fp); } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index c229dbe0277a..c5380500bed4 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -140,14 +140,14 @@ struct dso { struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root *root; /* root of rbtree that rb_node is in */ - struct rb_root symbols[MAP__NR_TYPES]; - struct rb_root symbol_names[MAP__NR_TYPES]; + struct rb_root symbols; + struct rb_root symbol_names; struct rb_root inlined_nodes; struct rb_root srclines; struct { u64 addr; struct symbol *symbol; - } last_find_result[MAP__NR_TYPES]; + } last_find_result; void *a2l; char *symsrc_filename; unsigned int a2l_fails; @@ -164,8 +164,8 @@ struct dso { u8 short_name_allocated:1; u8 long_name_allocated:1; u8 is_64_bit:1; - u8 sorted_by_name; - u8 loaded; + bool sorted_by_name; + bool loaded; u8 rel; u8 build_id[BUILD_ID_SIZE]; u64 text_offset; @@ -175,6 +175,7 @@ struct dso { u16 short_name_len; void *dwfl; /* DWARF debug info */ struct auxtrace_cache *auxtrace_cache; + int comp; /* dso data file */ struct { @@ -202,14 +203,13 @@ struct dso { * @dso: the 'struct dso *' in which symbols itereated * @pos: the 'struct symbol *' to use as a loop cursor * @n: the 'struct rb_node *' to use as a temporary storage - * @type: the 'enum map_type' type of symbols */ -#define dso__for_each_symbol(dso, pos, n, type) \ - symbols__for_each_entry(&(dso)->symbols[(type)], pos, n) +#define dso__for_each_symbol(dso, pos, n) \ + symbols__for_each_entry(&(dso)->symbols, pos, n) -static inline void dso__set_loaded(struct dso *dso, enum map_type type) +static inline void dso__set_loaded(struct dso *dso) { - dso->loaded |= (1 << type); + dso->loaded = true; } struct dso *dso__new(const char *name); @@ -231,11 +231,16 @@ static inline void __dso__zput(struct dso **dso) #define dso__zput(dso) __dso__zput(&dso) -bool dso__loaded(const struct dso *dso, enum map_type type); +bool dso__loaded(const struct dso *dso); -bool dso__sorted_by_name(const struct dso *dso, enum map_type type); -void dso__set_sorted_by_name(struct dso *dso, enum map_type type); -void dso__sort_by_name(struct dso *dso, enum map_type type); +static inline bool dso__has_symbols(const struct dso *dso) +{ + return !RB_EMPTY_ROOT(&dso->symbols); +} + +bool dso__sorted_by_name(const struct dso *dso); +void dso__set_sorted_by_name(struct dso *dso); +void dso__sort_by_name(struct dso *dso); void dso__set_build_id(struct dso *dso, void *build_id); bool dso__build_id_equal(const struct dso *dso, u8 *build_id); @@ -246,9 +251,7 @@ int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir); char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); -bool is_supported_compression(const char *ext); bool is_kernel_module(const char *pathname, int cpumode); -bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); int dso__decompress_kmodule_fd(struct dso *dso, const char *name); int dso__decompress_kmodule_path(struct dso *dso, const char *name, @@ -259,17 +262,15 @@ int dso__decompress_kmodule_path(struct dso *dso, const char *name, struct kmod_path { char *name; - char *ext; - bool comp; + int comp; bool kmod; }; int __kmod_path__parse(struct kmod_path *m, const char *path, - bool alloc_name, bool alloc_ext); + bool alloc_name); -#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false, false) -#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) -#define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) +#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false) +#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true) void dso__set_module_info(struct dso *dso, struct kmod_path *m, struct machine *machine); @@ -349,9 +350,8 @@ size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, size_t __dsos__fprintf(struct list_head *head, FILE *fp); size_t dso__fprintf_buildid(struct dso *dso, FILE *fp); -size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp); -size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp); +size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp); +size_t dso__fprintf(struct dso *dso, FILE *fp); static inline bool dso__is_vmlinux(struct dso *dso) { diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4c842762e3f2..59f38c7693f8 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -93,6 +93,37 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +static int perf_env__read_arch(struct perf_env *env) +{ + struct utsname uts; + + if (env->arch) + return 0; + + if (!uname(&uts)) + env->arch = strdup(uts.machine); + + return env->arch ? 0 : -ENOMEM; +} + +static int perf_env__read_nr_cpus_avail(struct perf_env *env) +{ + if (env->nr_cpus_avail == 0) + env->nr_cpus_avail = cpu__max_present_cpu(); + + return env->nr_cpus_avail ? 0 : -ENOENT; +} + +const char *perf_env__raw_arch(struct perf_env *env) +{ + return env && !perf_env__read_arch(env) ? env->arch : "unknown"; +} + +int perf_env__nr_cpus_avail(struct perf_env *env) +{ + return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0; +} + void cpu_cache_level__free(struct cpu_cache_level *cache) { free(cache->type); diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index c4ef2e523367..d01b8355f4ca 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -63,6 +63,7 @@ struct perf_env { struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; + u64 clockid_res_ns; }; extern struct perf_env perf_env; @@ -76,4 +77,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__raw_arch(struct perf_env *env); +int perf_env__nr_cpus_avail(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 98ff3a6a3d50..e9c108a6b1c3 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -88,10 +88,10 @@ static const char *perf_ns__name(unsigned int id) return perf_ns__names[id]; } -static int perf_tool__process_synth_event(struct perf_tool *tool, - union perf_event *event, - struct machine *machine, - perf_event__handler_t process) +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process) { struct perf_sample synth_sample = { .pid = -1, @@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool, event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; + event->fork.header.misc = PERF_RECORD_MISC_FORK_EXEC; event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size); @@ -464,8 +465,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, { int rc = 0; struct map *pos; - struct map_groups *kmaps = &machine->kmaps; - struct maps *maps = &kmaps->maps[MAP__FUNCTION]; + struct maps *maps = machine__kernel_maps(machine); union perf_event *event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); if (event == NULL) { @@ -488,7 +488,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool, for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - if (__map__is_kernel(pos)) + if (!__map__is_kmodule(pos)) continue; size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64)); @@ -542,10 +542,17 @@ static int __event__synthesize_thread(union perf_event *comm_event, tgid, process, machine) < 0) return -1; + /* + * send mmap only for thread group leader + * see thread__init_map_groups + */ + if (pid == tgid && + perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, + process, machine, mmap_data, + proc_map_timeout)) + return -1; - return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data, - proc_map_timeout); + return 0; } if (machine__is_default_guest(machine)) @@ -869,7 +876,7 @@ static int find_symbol_cb(void *arg, const char *name, char type, * Must be a function or at least an alias, as in PARISC64, where "_text" is * an 'A' to the same address as "_stext". */ - if (!(symbol_type__is_a(type, MAP__FUNCTION) || + if (!(kallsyms__is_function(type) || type == 'A') || strcmp(name, args->name)) return 0; @@ -889,9 +896,16 @@ int kallsyms__get_function_start(const char *kallsyms_filename, return 0; } -int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, - perf_event__handler_t process, - struct machine *machine) +int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused, + perf_event__handler_t process __maybe_unused, + struct machine *machine __maybe_unused) +{ + return 0; +} + +static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) { size_t size; struct map *map = machine__kernel_map(machine); @@ -944,6 +958,19 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, return err; } +int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine) +{ + int err; + + err = __perf_event__synthesize_kernel_mmap(tool, process, machine); + if (err < 0) + return err; + + return perf_event__synthesize_extra_kmaps(tool, process, machine); +} + int perf_event__synthesize_thread_map2(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, @@ -1055,6 +1082,7 @@ void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max } *size += sizeof(struct cpu_map_data); + *size = PERF_ALIGN(*size, sizeof(u64)); return zalloc(*size); } @@ -1489,9 +1517,8 @@ int perf_event__process(struct perf_tool *tool __maybe_unused, return machine__process_event(machine, event, sample); } -void thread__find_addr_map(struct thread *thread, u8 cpumode, - enum map_type type, u64 addr, - struct addr_location *al) +struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al) { struct map_groups *mg = thread->mg; struct machine *machine = mg->machine; @@ -1505,7 +1532,7 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode, if (machine == NULL) { al->map = NULL; - return; + return NULL; } if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) { @@ -1533,28 +1560,11 @@ void thread__find_addr_map(struct thread *thread, u8 cpumode, !perf_host) al->filtered |= (1 << HIST_FILTER__HOST); - return; + return NULL; } -try_again: - al->map = map_groups__find(mg, type, al->addr); - if (al->map == NULL) { - /* - * If this is outside of all known maps, and is a negative - * address, try to look it up in the kernel dso, as it might be - * a vsyscall or vdso (which executes in user-mode). - * - * XXX This is nasty, we should have a symbol list in the - * "[vdso]" dso, but for now lets use the old trick of looking - * in the whole kernel symbol list. - */ - if (cpumode == PERF_RECORD_MISC_USER && machine && - mg != &machine->kmaps && - machine__kernel_ip(machine, al->addr)) { - mg = &machine->kmaps; - load_map = true; - goto try_again; - } - } else { + + al->map = map_groups__find(mg, al->addr); + if (al->map != NULL) { /* * Kernel maps might be changed when loading symbols so loading * must be done prior to using kernel maps. @@ -1563,17 +1573,17 @@ try_again: map__load(al->map); al->addr = al->map->map_ip(al->map, al->addr); } + + return al->map; } -void thread__find_addr_location(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al) +struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al) { - thread__find_addr_map(thread, cpumode, type, addr, al); - if (al->map != NULL) + al->sym = NULL; + if (thread__find_map(thread, cpumode, addr, al)) al->sym = map__find_symbol(al->map, al->addr); - else - al->sym = NULL; + return al->sym; } /* @@ -1590,7 +1600,7 @@ int machine__resolve(struct machine *machine, struct addr_location *al, return -1; dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); + thread__find_map(thread, sample->cpumode, sample->ip, al); dump_printf(" ...... dso: %s\n", al->map ? al->map->dso->long_name : al->level == 'H' ? "[hypervisor]" : "<not found>"); @@ -1669,10 +1679,7 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr) void thread__resolve(struct thread *thread, struct addr_location *al, struct perf_sample *sample) { - thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al); - if (!al->map) - thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE, - sample->addr, al); + thread__find_map(thread, sample->cpumode, sample->addr, al); al->cpu = sample->cpu; al->sym = NULL; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 0f794744919c..bfa60bcafbde 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -750,6 +750,10 @@ int perf_event__process_exit(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_tool__process_synth_event(struct perf_tool *tool, + union perf_event *event, + struct machine *machine, + perf_event__handler_t process); int perf_event__process(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -796,6 +800,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, bool mmap_data, unsigned int proc_map_timeout); +int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, + perf_event__handler_t process, + struct machine *machine); + size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a59281d64368..668d2a9ef0f4 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -358,7 +358,7 @@ void perf_evlist__disable(struct perf_evlist *evlist) struct perf_evsel *pos; evlist__for_each_entry(evlist, pos) { - if (!perf_evsel__is_group_leader(pos) || !pos->fd) + if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->fd) continue; perf_evsel__disable(pos); } @@ -803,7 +803,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, if (*output == -1) { *output = fd; - if (perf_mmap__mmap(&maps[idx], mp, *output) < 0) + if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0) return -1; } else { if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0) @@ -1795,3 +1795,45 @@ bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) return true; } + +/* + * Events in data file are not collect in groups, but we still want + * the group display. Set the artificial group and set the leader's + * forced_leader flag to notify the display code. + */ +void perf_evlist__force_leader(struct perf_evlist *evlist) +{ + if (!evlist->nr_groups) { + struct perf_evsel *leader = perf_evlist__first(evlist); + + perf_evlist__set_leader(evlist); + leader->forced_leader = true; + } +} + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, + struct perf_evsel *evsel) +{ + struct perf_evsel *c2, *leader; + bool is_open = true; + + leader = evsel->leader; + pr_debug("Weak group for %s/%d failed\n", + leader->name, leader->nr_members); + + /* + * for_each_group_member doesn't work here because it doesn't + * include the first entry. + */ + evlist__for_each_entry(evsel_list, c2) { + if (c2 == evsel) + is_open = false; + if (c2->leader == leader) { + if (is_open) + perf_evsel__close(c2); + c2->leader = c2; + c2->nr_members = 0; + } + } + return leader; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 6c41b2f78713..9919eed6d15b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -309,4 +309,10 @@ struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); + +void perf_evlist__force_leader(struct perf_evlist *evlist); + +struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, + struct perf_evsel *evsel); + #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4cd2cf93f726..dbc0466db368 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -232,6 +232,7 @@ void perf_evsel__init(struct perf_evsel *evsel, evsel->leader = evsel; evsel->unit = ""; evsel->scale = 1.0; + evsel->max_events = ULONG_MAX; evsel->evlist = NULL; evsel->bpf_fd = -1; INIT_LIST_HEAD(&evsel->node); @@ -251,8 +252,9 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) { struct perf_evsel *evsel = zalloc(perf_evsel__object.size); - if (evsel != NULL) - perf_evsel__init(evsel, attr, idx); + if (!evsel) + return NULL; + perf_evsel__init(evsel, attr, idx); if (perf_evsel__is_bpf_output(evsel)) { evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | @@ -260,6 +262,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) evsel->attr.sample_period = 1; } + if (perf_evsel__is_clock(evsel)) { + /* + * The evsel->unit points to static alias->unit + * so it's ok to use static string in here. + */ + static const char *unit = "msec"; + + evsel->unit = unit; + evsel->scale = 1e-6; + } + return evsel; } @@ -781,6 +794,9 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_MAX_STACK: max_stack = term->val.max_stack; break; + case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS: + evsel->max_events = term->val.max_events; + break; case PERF_EVSEL__CONFIG_TERM_INHERIT: /* * attr->inherit should has already been set by @@ -848,6 +864,12 @@ static void apply_config_terms(struct perf_evsel *evsel, } } +static bool is_dummy_event(struct perf_evsel *evsel) +{ + return (evsel->attr.type == PERF_TYPE_SOFTWARE) && + (evsel->attr.config == PERF_COUNT_SW_DUMMY); +} + /* * The enable_on_exec/disabled value strategy: * @@ -934,7 +956,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->sample_freq = 0; attr->sample_period = 0; attr->write_backward = 0; - attr->sample_id_all = 0; } if (opts->no_samples) @@ -1071,6 +1092,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->exclude_user = 1; } + if (evsel->own_cpus || evsel->unit) + evsel->attr.read_format |= PERF_FORMAT_ID; + /* * Apply event specific term settings, * it overloads any global configuration. @@ -1086,6 +1110,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, else perf_evsel__reset_sample_bit(evsel, PERIOD); } + + /* + * For initial_delay, a dummy event is added implicitly. + * The software event will trigger -EOPNOTSUPP error out, + * if BRANCH_STACK bit is set. + */ + if (opts->initial_delay && is_dummy_event(evsel)) + perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -1174,16 +1206,27 @@ int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter) int perf_evsel__enable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_ENABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0); + + if (!err) + evsel->disabled = false; + + return err; } int perf_evsel__disable(struct perf_evsel *evsel) { - return perf_evsel__run_ioctl(evsel, - PERF_EVENT_IOC_DISABLE, - 0); + int err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0); + /* + * We mark it disabled here so that tools that disable a event can + * ignore events after they disable it. I.e. the ring buffer may have + * already a few more events queued up before the kernel got the stop + * request. + */ + if (!err) + evsel->disabled = true; + + return err; } int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -2197,7 +2240,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } } - if (type & PERF_SAMPLE_CALLCHAIN) { + if (evsel__has_callchain(evsel)) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); OVERFLOW_CHECK_u64(array); @@ -2656,15 +2699,15 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, return 0; } -struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) +struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name) { - return pevent_find_field(evsel->tp_format, name); + return tep_find_field(evsel->tp_format, name); } void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = perf_evsel__field(evsel, name); int offset; if (!field) @@ -2672,7 +2715,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, offset = field->offset; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(sample->raw_data + field->offset); offset &= 0xffff; } @@ -2680,7 +2723,7 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample, return sample->raw_data + offset; } -u64 format_field__intval(struct format_field *field, struct perf_sample *sample, +u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap) { u64 value; @@ -2722,7 +2765,7 @@ u64 format_field__intval(struct format_field *field, struct perf_sample *sample, u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, const char *name) { - struct format_field *field = perf_evsel__field(evsel, name); + struct tep_format_field *field = perf_evsel__field(evsel, name); if (!field) return 0; @@ -2857,12 +2900,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Hint: Try again after reducing the number of events.\n" "Hint: Try increasing the limit with 'ulimit -n <limit>'"); case ENOMEM: - if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0 && + if (evsel__has_callchain(evsel) && access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0) return scnprintf(msg, size, "Not enough memory to setup event with callchain.\n" "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n" - "Hint: Current value: %d", sysctl_perf_event_max_stack); + "Hint: Current value: %d", sysctl__max_stack()); break; case ENODEV: if (target->cpu_list) @@ -2914,3 +2957,32 @@ struct perf_env *perf_evsel__env(struct perf_evsel *evsel) return evsel->evlist->env; return NULL; } + +static int store_evsel_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + int cpu, thread; + + for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { + for (thread = 0; thread < xyarray__max_y(evsel->fd); + thread++) { + int fd = FD(evsel, cpu, thread); + + if (perf_evlist__id_add_fd(evlist, evsel, + cpu, thread, fd) < 0) + return -1; + } + } + + return 0; +} + +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist) +{ + struct cpu_map *cpus = evsel->cpus; + struct thread_map *threads = evsel->threads; + + if (perf_evsel__alloc_id(evsel, cpus->nr, threads->nr)) + return -ENOMEM; + + return store_evsel_ids(evsel, evlist); +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index b13f5f234c8f..3147ca76c6fc 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -46,6 +46,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_STACK_USER, PERF_EVSEL__CONFIG_TERM_INHERIT, PERF_EVSEL__CONFIG_TERM_MAX_STACK, + PERF_EVSEL__CONFIG_TERM_MAX_EVENTS, PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, @@ -65,6 +66,7 @@ struct perf_evsel_config_term { bool inherit; bool overwrite; char *branch; + unsigned long max_events; } val; bool weak; }; @@ -99,10 +101,12 @@ struct perf_evsel { struct perf_counts *prev_raw_counts; int idx; u32 ids; + unsigned long max_events; + unsigned long nr_events_printed; char *name; double scale; const char *unit; - struct event_format *tp_format; + struct tep_event_format *tp_format; off_t id_offset; struct perf_stat_evsel *stats; void *priv; @@ -119,6 +123,7 @@ struct perf_evsel { bool snapshot; bool supported; bool needs_swap; + bool disabled; bool no_aux_samples; bool immediate; bool system_wide; @@ -211,7 +216,7 @@ static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char * struct perf_evsel *perf_evsel__new_cycles(bool precise); -struct event_format *event_format__new(const char *sys, const char *name); +struct tep_event_format *event_format__new(const char *sys, const char *name); void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx); @@ -296,11 +301,11 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel, return perf_evsel__rawptr(evsel, sample, name); } -struct format_field; +struct tep_format_field; -u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap); +u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap); -struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); +struct tep_format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name); #define perf_evsel__match(evsel, t, c) \ (evsel->attr.type == PERF_TYPE_##t && \ @@ -402,10 +407,13 @@ bool perf_evsel__is_function_event(struct perf_evsel *evsel); static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel) { - struct perf_event_attr *attr = &evsel->attr; + return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT); +} - return (attr->config == PERF_COUNT_SW_BPF_OUTPUT) && - (attr->type == PERF_TYPE_SOFTWARE); +static inline bool perf_evsel__is_clock(struct perf_evsel *evsel) +{ + return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) || + perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK); } struct perf_attr_details { @@ -449,16 +457,28 @@ static inline int perf_evsel__group_idx(struct perf_evsel *evsel) return evsel->idx - evsel->leader->idx; } +/* Iterates group WITHOUT the leader. */ #define for_each_group_member(_evsel, _leader) \ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +/* Iterates group WITH the leader. */ +#define for_each_group_evsel(_evsel, _leader) \ +for ((_evsel) = _leader; \ + (_evsel) && (_evsel)->leader == (_leader); \ + (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) + static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel) { return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; } +static inline bool evsel__has_callchain(const struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0; +} + typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, @@ -466,4 +486,5 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, struct perf_env *perf_evsel__env(struct perf_evsel *evsel); +int perf_evsel__store_ids(struct perf_evsel *evsel, struct perf_evlist *evlist); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 06dfb027879d..0d0a4c6f368b 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -73,7 +73,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, } if (details->trace_fields) { - struct format_field *field; + struct tep_format_field *field; if (evsel->attr.type != PERF_TYPE_TRACEPOINT) { printed += comma_fprintf(fp, &first, " (not a tracepoint)"); diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c540d47583e7..aafbe54fd3fa 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -114,7 +114,7 @@ gen_build_id(struct buildid_note *note, fd = open("/dev/urandom", O_RDONLY); if (fd == -1) - err(1, "cannot access /dev/urandom for builid"); + err(1, "cannot access /dev/urandom for buildid"); sret = read(fd, note->build_id, sz); diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index de322d51c7fe..b72440bf9a79 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -29,6 +29,12 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent #elif defined(__powerpc__) #define GEN_ELF_ARCH EM_PPC #define GEN_ELF_CLASS ELFCLASS32 +#elif defined(__sparc__) && defined(__arch64__) +#define GEN_ELF_ARCH EM_SPARCV9 +#define GEN_ELF_CLASS ELFCLASS64 +#elif defined(__sparc__) +#define GEN_ELF_ARCH EM_SPARC +#define GEN_ELF_CLASS ELFCLASS32 #else #error "unsupported architecture" #endif diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c new file mode 100644 index 000000000000..267aa609a582 --- /dev/null +++ b/tools/perf/util/get_current_dir_name.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +// +#ifndef HAVE_GET_CURRENT_DIR_NAME +#include "util.h" +#include <unistd.h> +#include <stdlib.h> +#include <stdlib.h> + +/* Android's 'bionic' library, for one, doesn't have this */ + +char *get_current_dir_name(void) +{ + char pwd[PATH_MAX]; + + return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd); +} +#endif // HAVE_GET_CURRENT_DIR_NAME diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a8bff2178fbc..4fd45be95a43 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -279,8 +279,6 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) if (!set) return -ENOMEM; - bitmap_zero(set, size); - p = (u64 *) set; for (i = 0; (u64) i < BITS_TO_U64(size); i++) { @@ -1036,6 +1034,13 @@ static int write_auxtrace(struct feat_fd *ff, return err; } +static int write_clockid(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + return do_write(ff, &ff->ph->env.clockid_res_ns, + sizeof(ff->ph->env.clockid_res_ns)); +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1285,7 +1290,6 @@ static int memory_node__read(struct memory_node *n, unsigned long idx) return -ENOMEM; } - bitmap_zero(n->set, size); n->node = idx; n->size = size; @@ -1459,8 +1463,24 @@ static void print_cmdline(struct feat_fd *ff, FILE *fp) fprintf(fp, "# cmdline : "); - for (i = 0; i < nr; i++) - fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); + for (i = 0; i < nr; i++) { + char *argv_i = strdup(ff->ph->env.cmdline_argv[i]); + if (!argv_i) { + fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]); + } else { + char *mem = argv_i; + do { + char *quote = strchr(argv_i, '\''); + if (!quote) + break; + *quote++ = '\0'; + fprintf(fp, "%s\\\'", argv_i); + argv_i = quote; + } while (1); + fprintf(fp, "%s ", argv_i); + free(mem); + } + } fputc('\n', fp); } @@ -1495,6 +1515,12 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp) fprintf(fp, "# Core ID and Socket ID information is not available\n"); } +static void print_clockid(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n", + ff->ph->env.clockid_res_ns * 1000); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2113,6 +2139,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; struct perf_header *ph = ff->ph; + bool do_core_id_test = true; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) @@ -2167,6 +2194,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) return 0; } + /* On s390 the socket_id number is not related to the numbers of cpus. + * The socket_id number might be higher than the numbers of cpus. + * This depends on the configuration. + */ + if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4)) + do_core_id_test = false; + for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2176,7 +2210,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - if (nr != (u32)-1 && nr > (u32)cpu_nr) { + if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { pr_debug("socket_id number is too big." "You may need to upgrade the perf tool.\n"); goto free_cpu; @@ -2510,6 +2544,15 @@ out: return ret; } +static int process_clockid(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u64(ff, &ff->ph->env.clockid_res_ns)) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2563,12 +2606,13 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(NUMA_TOPOLOGY, numa_topology, true), FEAT_OPN(BRANCH_STACK, branch_stack, false), FEAT_OPR(PMU_MAPPINGS, pmu_mappings, false), - FEAT_OPN(GROUP_DESC, group_desc, false), + FEAT_OPR(GROUP_DESC, group_desc, false), FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), + FEAT_OPR(CLOCKID, clockid, false) }; struct header_print_data { @@ -3183,9 +3227,9 @@ static int read_attr(int fd, struct perf_header *ph, } static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, - struct pevent *pevent) + struct tep_handle *pevent) { - struct event_format *event; + struct tep_event_format *event; char bf[128]; /* already prepared */ @@ -3197,7 +3241,7 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, return -1; } - event = pevent_find_event(pevent, evsel->attr.config); + event = tep_find_event(pevent, evsel->attr.config); if (event == NULL) { pr_debug("cannot find event format for %d\n", (int)evsel->attr.config); return -1; @@ -3215,7 +3259,7 @@ static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel, } static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist, - struct pevent *pevent) + struct tep_handle *pevent) { struct perf_evsel *pos; @@ -3312,8 +3356,6 @@ int perf_session__read_header(struct perf_session *session) lseek(fd, tmp, SEEK_SET); } - symbol_conf.nr_events = nr_attrs; - perf_header__process_sections(header, fd, &session->tevent, perf_file_section__process); @@ -3429,10 +3471,10 @@ int perf_event__synthesize_features(struct perf_tool *tool, return ret; } -int perf_event__process_feature(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session __maybe_unused) +int perf_event__process_feature(struct perf_session *session, + union perf_event *event) { + struct perf_tool *tool = session->tool; struct feat_fd ff = { .fd = 0 }; struct feature_event *fe = (struct feature_event *)event; int type = fe->header.type; @@ -3442,7 +3484,7 @@ int perf_event__process_feature(struct perf_tool *tool, pr_warning("invalid record type %d in pipe-mode\n", type); return 0; } - if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) { + if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) { pr_warning("invalid record type %d in pipe-mode\n", type); return -1; } @@ -3618,13 +3660,13 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp) } int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, - perf_event__handler_t process) + struct perf_evlist *evlist, + perf_event__handler_t process) { struct perf_evsel *evsel; int err = 0; - evlist__for_each_entry(session->evlist, evsel) { + evlist__for_each_entry(evlist, evsel) { err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids, evsel->id, process); if (err) { @@ -3739,8 +3781,6 @@ int perf_event__process_attr(struct perf_tool *tool __maybe_unused, perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]); } - symbol_conf.nr_events = evlist->nr_entries; - return 0; } @@ -3839,9 +3879,8 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, return aligned_size; } -int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_tracing_data(struct perf_session *session, + union perf_event *event) { ssize_t size_read, padding, size = event->tracing_data.size; int fd = perf_data__fd(session->data); @@ -3907,9 +3946,8 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, return err; } -int perf_event__process_build_id(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_build_id(struct perf_session *session, + union perf_event *event) { __event_process_build_id(&event->build_id, event->build_id.filename, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 90d4577a92dc..0d553ddca0a3 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -2,6 +2,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H +#include <linux/stddef.h> #include <linux/perf_event.h> #include <sys/types.h> #include <stdbool.h> @@ -37,6 +38,7 @@ enum { HEADER_CACHE, HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, + HEADER_CLOCKID, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -115,15 +117,14 @@ int perf_event__synthesize_extra_attr(struct perf_tool *tool, perf_event__handler_t process, bool is_pipe); -int perf_event__process_feature(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_feature(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *attr, u32 ids, u64 *id, perf_event__handler_t process); int perf_event__synthesize_attrs(struct perf_tool *tool, - struct perf_session *session, + struct perf_evlist *evlist, perf_event__handler_t process); int perf_event__synthesize_event_update_unit(struct perf_tool *tool, struct perf_evsel *evsel, @@ -147,17 +148,15 @@ size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct perf_evlist *evlist, perf_event__handler_t process); -int perf_event__process_tracing_data(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_tracing_data(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc, perf_event__handler_t process, struct machine *machine); -int perf_event__process_build_id(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_build_id(struct perf_session *session, + union perf_event *event); bool is_perf_magic(u64 magic); #define NAME_ALIGN 64 diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 4d602fba40b2..828cb9794c76 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists) static int hist_entry__init(struct hist_entry *he, struct hist_entry *template, - bool sample_self) + bool sample_self, + size_t callchain_size) { *he = *template; + he->callchain_size = callchain_size; if (symbol_conf.cumulate_callchain) { he->stat_acc = malloc(sizeof(he->stat)); @@ -410,7 +412,7 @@ static int hist_entry__init(struct hist_entry *he, map__get(he->mem_info->daddr.map); } - if (symbol_conf.use_callchain) + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) callchain_init(he->callchain); if (he->raw_data) { @@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, he = ops->new(callchain_size); if (he) { - err = hist_entry__init(he, template, sample_self); + err = hist_entry__init(he, template, sample_self, callchain_size); if (err) { ops->free(he); he = NULL; @@ -492,7 +494,7 @@ static u8 symbol__parent_filter(const struct symbol *parent) static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period) { - if (!symbol_conf.use_callchain) + if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain) return; he->hists->callchain_period += period; @@ -619,9 +621,11 @@ __hists__add_entry(struct hists *hists, .raw_data = sample->raw_data, .raw_size = sample->raw_size, .ops = ops, - }; + }, *he = hists__findnew_entry(hists, &entry, al, sample_self); - return hists__findnew_entry(hists, &entry, al, sample_self); + if (!hists->has_callchains && he && he->callchain_size != 0) + hists->has_callchains = true; + return he; } struct hist_entry *hists__add_entry(struct hists *hists, @@ -986,7 +990,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, iter->he = he; he_cache[iter->curr++] = he; - if (symbol_conf.use_callchain) + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) callchain_append(he->callchain, &cursor, sample->period); return 0; } @@ -1039,7 +1043,7 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, int err, err2; struct map *alm = NULL; - if (al && al->map) + if (al) alm = map__get(al->map); err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, @@ -1373,7 +1377,8 @@ static int hists__hierarchy_insert_entry(struct hists *hists, if (new_he) { new_he->leaf = true; - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(new_he) && + symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); if (callchain_merge(&callchain_cursor, new_he->callchain, @@ -1414,7 +1419,7 @@ static int hists__collapse_insert_entry(struct hists *hists, if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); - if (symbol_conf.use_callchain) { + if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { callchain_cursor_reset(&callchain_cursor); if (callchain_merge(&callchain_cursor, iter->callchain, @@ -1757,7 +1762,7 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro bool use_callchain; if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph) - use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; + use_callchain = evsel__has_callchain(evsel); else use_callchain = symbol_conf.use_callchain; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index fbabfd8a215d..3badd7f1e1b8 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -85,6 +85,7 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; + bool has_callchains; int socket_filter; struct perf_hpp_list *hpp_list; struct list_head hpp_formats; @@ -180,7 +181,7 @@ size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, int max_cols, float min_pcnt, FILE *fp, - bool use_callchain); + bool ignore_callchains); size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp); void hists__filter_by_dso(struct hists *hists); @@ -220,6 +221,11 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) return &hevsel->hists; } +static __pure inline bool hists__has_callchains(struct hists *hists) +{ + return hists->has_callchains; +} + int hists__init(void); int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); @@ -419,19 +425,24 @@ struct hist_browser_timer { int refresh; }; +struct annotation_options; + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *annotation_opts); int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, + struct annotation_options *annotation_opts); int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, - bool warn_lost_event); + bool warn_lost_event, + struct annotation_options *annotation_options); int script_browse(const char *script_opt); #else static inline @@ -440,20 +451,23 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, struct hist_browser_timer *hbt __maybe_unused, float min_pcnt __maybe_unused, struct perf_env *env __maybe_unused, - bool warn_lost_event __maybe_unused) + bool warn_lost_event __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *annotation_options __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + struct annotation_options *annotation_opts __maybe_unused) { return 0; } diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 72db2744876d..7b27d77306c2 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -269,6 +269,13 @@ static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, return 0; } +static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip) +{ + return machine__kernel_ip(bts->machine, ip) ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, struct branch *branch) { @@ -281,12 +288,8 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, bts->num_events++ <= bts->synth_opts.initial_skip) return 0; - event.sample.header.type = PERF_RECORD_SAMPLE; - event.sample.header.misc = PERF_RECORD_MISC_USER; - event.sample.header.size = sizeof(struct perf_event_header); - - sample.cpumode = PERF_RECORD_MISC_USER; sample.ip = le64_to_cpu(branch->from); + sample.cpumode = intel_bts_cpumode(bts, sample.ip); sample.pid = btsq->pid; sample.tid = btsq->tid; sample.addr = le64_to_cpu(branch->to); @@ -298,6 +301,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, sample.insn_len = btsq->intel_pt_insn.length; memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ); + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + if (bts->synth_opts.inject) { event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, @@ -335,8 +342,7 @@ static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) if (!thread) return -1; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) goto out_put; len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, @@ -911,7 +917,8 @@ int intel_bts_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { bts->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&bts->synth_opts); + itrace_synth_opts__set_default(&bts->synth_opts, + session->itrace_synth_opts->default_no_sample); if (session->itrace_synth_opts) bts->synth_opts.thread_stack = session->itrace_synth_opts->thread_stack; diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h index e23578c7b1be..2669c9f748e4 100644 --- a/tools/perf/util/intel-pt-decoder/insn.h +++ b/tools/perf/util/intel-pt-decoder/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index f9157aed1289..4503f3ca45ab 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -113,6 +113,7 @@ struct intel_pt_decoder { bool have_cyc; bool fixup_last_mtc; bool have_last_ip; + enum intel_pt_param_flags flags; uint64_t pos; uint64_t last_ip; uint64_t ip; @@ -226,6 +227,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->return_compression = params->return_compression; decoder->branch_enable = params->branch_enable; + decoder->flags = params->flags; + decoder->period = params->period; decoder->period_type = params->period_type; @@ -1097,6 +1100,15 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) return ret; } +static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder, + struct intel_pt_insn *intel_pt_insn, + uint64_t ip, int err) +{ + return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err && + intel_pt_insn->branch == INTEL_PT_BR_INDIRECT && + ip == decoder->ip + intel_pt_insn->length; +} + static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) { struct intel_pt_insn intel_pt_insn; @@ -1109,10 +1121,11 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); if (err == INTEL_PT_RETURN) return 0; - if (err == -EAGAIN) { + if (err == -EAGAIN || + intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) { if (intel_pt_fup_event(decoder)) return 0; - return err; + return -EAGAIN; } decoder->set_fup_tx_flags = false; if (err) @@ -1152,7 +1165,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->pge = false; decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } if (err == INTEL_PT_RETURN) @@ -1166,9 +1179,13 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->continuous_period = false; decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { + decoder->state.to_ip = decoder->last_ip; decoder->ip = decoder->last_ip; + } + decoder->state.type |= INTEL_PT_TRACE_END; } else { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->state.from_ip = decoder->ip; @@ -1195,7 +1212,8 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->ip = to_ip; decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.to_ip = to_ip; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; } intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", @@ -1376,7 +1394,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) { intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); - decoder->have_tma = false; decoder->cbr = 0; decoder->timestamp_insn_cnt = 0; decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; @@ -1457,6 +1474,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) decoder->have_calc_cyc_to_tsc = false; intel_pt_calc_cyc_to_tsc(decoder, true); } + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) @@ -1497,6 +1516,8 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) decoder->timestamp = timestamp; decoder->timestamp_insn_cnt = 0; + + intel_pt_log_to("Setting timestamp", decoder->timestamp); } /* Walk PSB+ packets when already in sync. */ @@ -1604,7 +1625,6 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) case INTEL_PT_PSB: case INTEL_PT_TSC: case INTEL_PT_TMA: - case INTEL_PT_CBR: case INTEL_PT_MODE_TSX: case INTEL_PT_BAD: case INTEL_PT_PSBEND: @@ -1620,19 +1640,24 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) decoder->pkt_step = 0; return -ENOENT; + case INTEL_PT_CBR: + intel_pt_calc_cbr(decoder); + break; + case INTEL_PT_OVF: return intel_pt_overflow(decoder); case INTEL_PT_TIP_PGD: decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; - if (decoder->packet.count != 0) { + if (decoder->packet.count == 0) { + decoder->state.to_ip = 0; + } else { intel_pt_set_ip(decoder); - intel_pt_log("Omitting PGD ip " x64_fmt "\n", - decoder->ip); + decoder->state.to_ip = decoder->ip; } decoder->pge = false; decoder->continuous_period = false; + decoder->state.type |= INTEL_PT_TRACE_END; return 0; case INTEL_PT_TIP_PGE: @@ -1646,6 +1671,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) intel_pt_set_ip(decoder); decoder->state.to_ip = decoder->ip; } + decoder->state.type |= INTEL_PT_TRACE_BEGIN; return 0; case INTEL_PT_TIP: @@ -1724,6 +1750,7 @@ next: intel_pt_set_ip(decoder); decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; + decoder->state.type |= INTEL_PT_TRACE_BEGIN; return 0; } @@ -2062,9 +2089,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); - if (decoder->ip) - return 0; - break; + if (!decoder->ip) + break; + if (decoder->packet.type == INTEL_PT_TIP_PGE) + decoder->state.type |= INTEL_PT_TRACE_BEGIN; + if (decoder->packet.type == INTEL_PT_TIP_PGD) + decoder->state.type |= INTEL_PT_TRACE_END; + return 0; case INTEL_PT_FUP: if (intel_pt_have_ip(decoder)) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index fc1752d50019..ed088d4726ba 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -37,6 +37,8 @@ enum intel_pt_sample_type { INTEL_PT_EX_STOP = 1 << 6, INTEL_PT_PWR_EXIT = 1 << 7, INTEL_PT_CBR_CHG = 1 << 8, + INTEL_PT_TRACE_BEGIN = 1 << 9, + INTEL_PT_TRACE_END = 1 << 10, }; enum intel_pt_period_type { @@ -60,6 +62,14 @@ enum { INTEL_PT_ERR_MAX, }; +enum intel_pt_param_flags { + /* + * FUP packet can contain next linear instruction pointer instead of + * current linear instruction pointer. + */ + INTEL_PT_FUP_WITH_NLIP = 1 << 0, +}; + struct intel_pt_state { enum intel_pt_sample_type type; int err; @@ -106,6 +116,7 @@ struct intel_pt_params { unsigned int mtc_period; uint32_t tsc_ctc_ratio_n; uint32_t tsc_ctc_ratio_d; + enum intel_pt_param_flags flags; }; struct intel_pt_decoder; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c index e02bc7b166a0..5e64da270f97 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c @@ -31,6 +31,11 @@ static FILE *f; static char log_name[MAX_LOG_NAME]; bool intel_pt_enable_logging; +void *intel_pt_log_fp(void) +{ + return f; +} + void intel_pt_log_enable(void) { intel_pt_enable_logging = true; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h index 45b64f93f358..cc084937f701 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-log.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h @@ -22,6 +22,7 @@ struct intel_pt_pkt; +void *intel_pt_log_fp(void); void intel_pt_log_enable(void); void intel_pt_log_disable(void); void intel_pt_log_set_name(const char *name); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index ba4c9dd18643..d426761a549d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -366,7 +366,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, if (len < offs) return INTEL_PT_NEED_MORE_BYTES; byte = buf[offs++]; - payload |= (byte >> 1) << shift; + payload |= ((uint64_t)byte >> 1) << shift; } packet->type = INTEL_PT_CYC; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 0effaff57020..149ff361ca78 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -206,6 +206,16 @@ static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, intel_pt_dump(pt, buf, len); } +static void intel_pt_log_event(union perf_event *event) +{ + FILE *f = intel_pt_log_fp(); + + if (!intel_pt_enable_logging || !f) + return; + + perf_event__fprintf(event, f); +} + static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, struct auxtrace_buffer *b) { @@ -407,6 +417,13 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) return auxtrace_cache__lookup(dso->auxtrace_cache, offset); } +static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip) +{ + return ip >= pt->kernel_start ? + PERF_RECORD_MISC_KERNEL : + PERF_RECORD_MISC_USER; +} + static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, uint64_t max_insn_cnt, @@ -429,10 +446,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, if (to_ip && *ip == to_ip) goto out_no_cache; - if (*ip >= ptq->pt->kernel_start) - cpumode = PERF_RECORD_MISC_KERNEL; - else - cpumode = PERF_RECORD_MISC_USER; + cpumode = intel_pt_cpumode(ptq->pt, *ip); thread = ptq->thread; if (!thread) { @@ -442,8 +456,7 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, } while (1) { - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso) return -EINVAL; if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && @@ -596,8 +609,7 @@ static int __intel_pt_pgd_ip(uint64_t ip, void *data) if (!thread) return -EINVAL; - thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); - if (!al.map || !al.map->dso) + if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso) return -EINVAL; offset = al.map->map_ip(al.map, ip); @@ -751,6 +763,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, unsigned int queue_nr) { struct intel_pt_params params = { .get_trace = 0, }; + struct perf_env *env = pt->machine->env; struct intel_pt_queue *ptq; ptq = zalloc(sizeof(struct intel_pt_queue)); @@ -760,7 +773,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, if (pt->synth_opts.callchain) { size_t sz = sizeof(struct ip_callchain); - sz += pt->synth_opts.callchain_sz * sizeof(u64); + /* Add 1 to callchain_sz for callchain context */ + sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64); ptq->chain = zalloc(sz); if (!ptq->chain) goto out_free; @@ -832,6 +846,9 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, } } + if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18)) + params.flags |= INTEL_PT_FUP_WITH_NLIP; + ptq->decoder = intel_pt_decoder_new(¶ms); if (!ptq->decoder) goto out_free; @@ -906,6 +923,11 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq) ptq->insn_len = ptq->state->insn_len; memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ); } + + if (ptq->state->type & INTEL_PT_TRACE_BEGIN) + ptq->flags |= PERF_IP_FLAG_TRACE_BEGIN; + if (ptq->state->type & INTEL_PT_TRACE_END) + ptq->flags |= PERF_IP_FLAG_TRACE_END; } static int intel_pt_setup_queue(struct intel_pt *pt, @@ -1051,15 +1073,11 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) { - event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = PERF_RECORD_MISC_USER; - event->sample.header.size = sizeof(struct perf_event_header); - if (!pt->timeless_decoding) sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); - sample->cpumode = PERF_RECORD_MISC_USER; sample->ip = ptq->state->from_ip; + sample->cpumode = intel_pt_cpumode(pt, sample->ip); sample->pid = ptq->pid; sample->tid = ptq->tid; sample->addr = ptq->state->to_ip; @@ -1068,6 +1086,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, sample->flags = ptq->flags; sample->insn_len = ptq->insn_len; memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ); + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = sample->cpumode; + event->sample.header.size = sizeof(struct perf_event_header); } static int intel_pt_inject_event(union perf_event *event, @@ -1153,7 +1175,8 @@ static void intel_pt_prep_sample(struct intel_pt *pt, if (pt->synth_opts.callchain) { thread_stack__sample(ptq->thread, ptq->chain, - pt->synth_opts.callchain_sz, sample->ip); + pt->synth_opts.callchain_sz + 1, + sample->ip, pt->kernel_start); sample->callchain = ptq->chain; } @@ -1523,6 +1546,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) if (intel_pt_is_switch_ip(ptq, state->to_ip)) { switch (ptq->switch_state) { + case INTEL_PT_SS_NOT_TRACING: case INTEL_PT_SS_UNKNOWN: case INTEL_PT_SS_EXPECTING_SWITCH_IP: err = intel_pt_next_tid(pt, ptq); @@ -1565,7 +1589,7 @@ static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip) if (map__load(map)) return 0; - start = dso__first_symbol(map->dso, MAP__FUNCTION); + start = dso__first_symbol(map->dso); for (sym = start; sym; sym = dso__next_symbol(sym)) { if (sym->binding == STB_GLOBAL && @@ -1996,9 +2020,9 @@ static int intel_pt_process_event(struct perf_session *session, event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) err = intel_pt_context_switch(pt, event, sample); - intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", - perf_event__name(event->header.type), event->header.type, - sample->cpu, sample->time, timestamp); + intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ", + event->header.type, sample->cpu, sample->time, timestamp); + intel_pt_log_event(event); return err; } @@ -2551,7 +2575,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event, if (session->itrace_synth_opts && session->itrace_synth_opts->set) { pt->synth_opts = *session->itrace_synth_opts; } else { - itrace_synth_opts__set_default(&pt->synth_opts); + itrace_synth_opts__set_default(&pt->synth_opts, + session->itrace_synth_opts->default_no_sample); if (use_browser != -1) { pt->synth_opts.branches = false; pt->synth_opts.callchain = true; diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 1cca0a2fa641..5b0b60f00275 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -14,19 +14,22 @@ #include "config.h" #include "util.h" #include <sys/wait.h> +#include <subcmd/exec-cmd.h> #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \ - "$CLANG_OPTIONS $KERNEL_INC_OPTIONS " \ + "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \ "-Wno-unused-value -Wno-pointer-sign " \ "-working-directory $WORKING_DIR " \ - "-c \"$CLANG_SOURCE\" -target bpf -O2 -o -" + "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE" struct llvm_param llvm_param = { .clang_path = "clang", + .llc_path = "llc", .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, .clang_opt = NULL, + .opts = NULL, .kbuild_dir = NULL, .kbuild_opts = NULL, .user_set_param = false, @@ -50,6 +53,8 @@ int perf_llvm_config(const char *var, const char *value) llvm_param.kbuild_opts = strdup(value); else if (!strcmp(var, "dump-obj")) llvm_param.dump_obj = !!perf_config_bool(var, value); + else if (!strcmp(var, "opts")) + llvm_param.opts = strdup(value); else { pr_debug("Invalid LLVM config option: %s\n", value); return -1; @@ -212,7 +217,7 @@ version_notice(void) " \t\thttp://llvm.org/apt\n\n" " \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" " \toption in [llvm] section of ~/.perfconfig to:\n\n" -" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" +" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n" " \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" " \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" " \t(Replace /path/to/llc with path to your llc)\n\n" @@ -265,16 +270,16 @@ static const char *kinc_fetch_script = "#!/usr/bin/env sh\n" "if ! test -d \"$KBUILD_DIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "TMPDIR=`mktemp -d`\n" "if test -z \"$TMPDIR\"\n" "then\n" -" exit -1\n" +" exit 1\n" "fi\n" "cat << EOF > $TMPDIR/Makefile\n" "obj-y := dummy.o\n" @@ -429,11 +434,15 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, unsigned int kernel_version; char linux_version_code_str[64]; const char *clang_opt = llvm_param.clang_opt; - char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; + char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64]; char serr[STRERR_BUFSIZE]; - char *kbuild_dir = NULL, *kbuild_include_opts = NULL; + char *kbuild_dir = NULL, *kbuild_include_opts = NULL, + *perf_bpf_include_opts = NULL; const char *template = llvm_param.clang_bpf_cmd_template; - char *command_echo, *command_out; + char *pipe_template = NULL; + const char *opts = llvm_param.opts; + char *command_echo = NULL, *command_out; + char *perf_include_dir = system_path(PERF_INCLUDE_DIR); if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -471,14 +480,36 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - + if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0) + goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); force_set_env("CLANG_EXEC", clang_path); force_set_env("CLANG_OPTIONS", clang_opt); force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); + force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts); force_set_env("WORKING_DIR", kbuild_dir ? : "."); + if (opts) { + err = search_program(llvm_param.llc_path, "llc", llc_path); + if (err) { + pr_err("ERROR:\tunable to find llc.\n" + "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" + " \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n"); + version_notice(); + goto errout; + } + + if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -", + template, llc_path, opts) < 0) { + pr_err("ERROR:\tnot enough memory to setup command line\n"); + goto errout; + } + + template = pipe_template; + + } + /* * Since we may reset clang's working dir, path of source file * should be transferred into absolute path, except we want @@ -512,6 +543,8 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(command_out); free(kbuild_dir); free(kbuild_include_opts); + free(perf_bpf_include_opts); + free(perf_include_dir); if (!p_obj_buf) free(obj_buf); @@ -526,6 +559,9 @@ errout: free(kbuild_dir); free(kbuild_include_opts); free(obj_buf); + free(perf_bpf_include_opts); + free(perf_include_dir); + free(pipe_template); if (p_obj_buf) *p_obj_buf = NULL; if (p_obj_buf_sz) diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h index d3ad8deb5db4..bf3f3f4c4fe2 100644 --- a/tools/perf/util/llvm-utils.h +++ b/tools/perf/util/llvm-utils.h @@ -11,6 +11,8 @@ struct llvm_param { /* Path of clang executable */ const char *clang_path; + /* Path of llc executable */ + const char *llc_path; /* * Template of clang bpf compiling. 5 env variables * can be used: @@ -23,6 +25,13 @@ struct llvm_param { const char *clang_bpf_cmd_template; /* Will be filled in $CLANG_OPTIONS */ const char *clang_opt; + /* + * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe + * the clang output to llc, useful for new llvm options not + * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris + * in clang 6.0/llvm 7 + */ + const char *opts; /* Where to find kbuild system */ const char *kbuild_dir; /* diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c index 07498eaddc08..b1dd29a9d915 100644 --- a/tools/perf/util/lzma.c +++ b/tools/perf/util/lzma.c @@ -3,9 +3,13 @@ #include <lzma.h> #include <stdio.h> #include <linux/compiler.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> #include "compress.h" #include "util.h" #include "debug.h" +#include <unistd.h> #define BUFSIZE 8192 @@ -99,3 +103,19 @@ err_fclose: fclose(infile); return err; } + +bool lzma_is_compressed(const char *input) +{ + int fd = open(input, O_RDONLY); + const uint8_t magic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; + char buf[6] = { 0 }; + ssize_t rc; + + if (fd < 0) + return -1; + + rc = read(fd, buf, sizeof(buf)); + close(fd); + return rc == sizeof(buf) ? + memcmp(buf, magic, sizeof(buf)) == 0 : false; +} diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 32d50492505d..8f36ce813bc5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -24,6 +24,7 @@ #include "sane_ctype.h" #include <symbol/kallsyms.h> +#include <linux/mman.h> static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); @@ -81,8 +82,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) machine->kptr_restrict_warned = false; machine->comm_exec = false; machine->kernel_start = 0; - - memset(machine->vmlinux_maps, 0, sizeof(machine->vmlinux_maps)); + machine->vmlinux_map = NULL; machine->root_dir = strdup(root_dir); if (machine->root_dir == NULL) @@ -137,13 +137,11 @@ struct machine *machine__new_kallsyms(void) struct machine *machine = machine__new_host(); /* * FIXME: - * 1) MAP__FUNCTION will go away when we stop loading separate maps for - * functions and data objects. - * 2) We should switch to machine__load_kallsyms(), i.e. not explicitely + * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely * ask for not using the kcore parsing code, once this one is fixed * to create a map per module. */ - if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) { + if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) { machine__delete(machine); machine = NULL; } @@ -410,23 +408,16 @@ out_err: } /* - * Caller must eventually drop thread->refcnt returned with a successful - * lookup/new thread inserted. + * Front-end cache - TID lookups come in blocks, + * so most of the time we dont have to look up + * the full rbtree: */ -static struct thread *____machine__findnew_thread(struct machine *machine, - struct threads *threads, - pid_t pid, pid_t tid, - bool create) +static struct thread* +__threads__get_last_match(struct threads *threads, struct machine *machine, + int pid, int tid) { - struct rb_node **p = &threads->entries.rb_node; - struct rb_node *parent = NULL; struct thread *th; - /* - * Front-end cache - TID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ th = threads->last_match; if (th != NULL) { if (th->tid == tid) { @@ -437,12 +428,57 @@ static struct thread *____machine__findnew_thread(struct machine *machine, threads->last_match = NULL; } + return NULL; +} + +static struct thread* +threads__get_last_match(struct threads *threads, struct machine *machine, + int pid, int tid) +{ + struct thread *th = NULL; + + if (perf_singlethreaded) + th = __threads__get_last_match(threads, machine, pid, tid); + + return th; +} + +static void +__threads__set_last_match(struct threads *threads, struct thread *th) +{ + threads->last_match = th; +} + +static void +threads__set_last_match(struct threads *threads, struct thread *th) +{ + if (perf_singlethreaded) + __threads__set_last_match(threads, th); +} + +/* + * Caller must eventually drop thread->refcnt returned with a successful + * lookup/new thread inserted. + */ +static struct thread *____machine__findnew_thread(struct machine *machine, + struct threads *threads, + pid_t pid, pid_t tid, + bool create) +{ + struct rb_node **p = &threads->entries.rb_node; + struct rb_node *parent = NULL; + struct thread *th; + + th = threads__get_last_match(threads, machine, pid, tid); + if (th) + return th; + while (*p != NULL) { parent = *p; th = rb_entry(parent, struct thread, rb_node); if (th->tid == tid) { - threads->last_match = th; + threads__set_last_match(threads, th); machine__update_thread_pid(machine, th, pid); return thread__get(th); } @@ -479,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, * It is now in the rbtree, get a ref */ thread__get(th); - threads->last_match = th; + threads__set_last_match(threads, th); ++threads->nr; } @@ -673,8 +709,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, if (kmod_path__parse_name(&m, filename)) return NULL; - map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, - m.name); + map = map_groups__find_by_name(&machine->kmaps, m.name); if (map) { /* * If the map's dso is an offline module, give dso__load() @@ -689,7 +724,7 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start, if (dso == NULL) goto out; - map = map__new2(start, dso, MAP__FUNCTION); + map = map__new2(start, dso); if (map == NULL) goto out; @@ -810,8 +845,8 @@ struct process_args { u64 start; }; -static void machine__get_kallsyms_filename(struct machine *machine, char *buf, - size_t bufsz) +void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz) { if (machine__is_default_guest(machine)) scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms); @@ -854,65 +889,171 @@ static int machine__get_running_kernel_start(struct machine *machine, return 0; } +int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm) +{ + struct kmap *kmap; + struct map *map; + + map = map__new2(xm->start, kernel); + if (!map) + return -1; + + map->end = xm->end; + map->pgoff = xm->pgoff; + + kmap = map__kmap(map); + + kmap->kmaps = &machine->kmaps; + strlcpy(kmap->name, xm->name, KMAP_NAME_LEN); + + map_groups__insert(&machine->kmaps, map); + + pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n", + kmap->name, map->start, map->end); + + map__put(map); + + return 0; +} + +static u64 find_entry_trampoline(struct dso *dso) +{ + /* Duplicates are removed so lookup all aliases */ + const char *syms[] = { + "_entry_trampoline", + "__entry_trampoline_start", + "entry_SYSCALL_64_trampoline", + }; + struct symbol *sym = dso__first_symbol(dso); + unsigned int i; + + for (; sym; sym = dso__next_symbol(sym)) { + if (sym->binding != STB_GLOBAL) + continue; + for (i = 0; i < ARRAY_SIZE(syms); i++) { + if (!strcmp(sym->name, syms[i])) + return sym->start; + } + } + + return 0; +} + +/* + * These values can be used for kernels that do not have symbols for the entry + * trampolines in kallsyms. + */ +#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL +#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000 +#define X86_64_ENTRY_TRAMPOLINE 0x6000 + +/* Map x86_64 PTI entry trampolines */ +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel) +{ + struct map_groups *kmaps = &machine->kmaps; + struct maps *maps = &kmaps->maps; + int nr_cpus_avail, cpu; + bool found = false; + struct map *map; + u64 pgoff; + + /* + * In the vmlinux case, pgoff is a virtual address which must now be + * mapped to a vmlinux offset. + */ + for (map = maps__first(maps); map; map = map__next(map)) { + struct kmap *kmap = __map__kmap(map); + struct map *dest_map; + + if (!kmap || !is_entry_trampoline(kmap->name)) + continue; + + dest_map = map_groups__find(kmaps, map->pgoff); + if (dest_map != map) + map->pgoff = dest_map->map_ip(dest_map, map->pgoff); + found = true; + } + if (found || machine->trampolines_mapped) + return 0; + + pgoff = find_entry_trampoline(kernel); + if (!pgoff) + return 0; + + nr_cpus_avail = machine__nr_cpus_avail(machine); + + /* Add a 1 page map for each CPU's entry trampoline */ + for (cpu = 0; cpu < nr_cpus_avail; cpu++) { + u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU + + cpu * X86_64_CPU_ENTRY_AREA_SIZE + + X86_64_ENTRY_TRAMPOLINE; + struct extra_kernel_map xm = { + .start = va, + .end = va + page_size, + .pgoff = pgoff, + }; + + strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN); + + if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0) + return -1; + } + + machine->trampolines_mapped = nr_cpus_avail; + + return 0; +} + +int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused, + struct dso *kernel __maybe_unused) +{ + return 0; +} + static int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) { - int type; + struct kmap *kmap; + struct map *map; /* In case of renewal the kernel map, destroy previous one */ machine__destroy_kernel_maps(machine); - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - struct map *map; - - machine->vmlinux_maps[type] = map__new2(0, kernel, type); - if (machine->vmlinux_maps[type] == NULL) - return -1; + machine->vmlinux_map = map__new2(0, kernel); + if (machine->vmlinux_map == NULL) + return -1; - machine->vmlinux_maps[type]->map_ip = - machine->vmlinux_maps[type]->unmap_ip = - identity__map_ip; - map = __machine__kernel_map(machine, type); - kmap = map__kmap(map); - if (!kmap) - return -1; + machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip; + map = machine__kernel_map(machine); + kmap = map__kmap(map); + if (!kmap) + return -1; - kmap->kmaps = &machine->kmaps; - map_groups__insert(&machine->kmaps, map); - } + kmap->kmaps = &machine->kmaps; + map_groups__insert(&machine->kmaps, map); return 0; } void machine__destroy_kernel_maps(struct machine *machine) { - int type; - - for (type = 0; type < MAP__NR_TYPES; ++type) { - struct kmap *kmap; - struct map *map = __machine__kernel_map(machine, type); - - if (map == NULL) - continue; + struct kmap *kmap; + struct map *map = machine__kernel_map(machine); - kmap = map__kmap(map); - map_groups__remove(&machine->kmaps, map); - if (kmap && kmap->ref_reloc_sym) { - /* - * ref_reloc_sym is shared among all maps, so free just - * on one of them. - */ - if (type == MAP__FUNCTION) { - zfree((char **)&kmap->ref_reloc_sym->name); - zfree(&kmap->ref_reloc_sym); - } else - kmap->ref_reloc_sym = NULL; - } + if (map == NULL) + return; - map__put(machine->vmlinux_maps[type]); - machine->vmlinux_maps[type] = NULL; + kmap = map__kmap(map); + map_groups__remove(&machine->kmaps, map); + if (kmap && kmap->ref_reloc_sym) { + zfree((char **)&kmap->ref_reloc_sym->name); + zfree(&kmap->ref_reloc_sym); } + + map__zput(machine->vmlinux_map); } int machines__create_guest_kernel_maps(struct machines *machines) @@ -989,32 +1130,31 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid) return machine__create_kernel_maps(machine); } -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type) +int machine__load_kallsyms(struct machine *machine, const char *filename) { struct map *map = machine__kernel_map(machine); int ret = __dso__load_kallsyms(map->dso, filename, map, true); if (ret > 0) { - dso__set_loaded(map->dso, type); + dso__set_loaded(map->dso); /* * Since /proc/kallsyms will have multiple sessions for the * kernel, with modules between them, fixup the end of all * sections. */ - __map_groups__fixup_end(&machine->kmaps, type); + map_groups__fixup_end(&machine->kmaps); } return ret; } -int machine__load_vmlinux_path(struct machine *machine, enum map_type type) +int machine__load_vmlinux_path(struct machine *machine) { struct map *map = machine__kernel_map(machine); int ret = dso__load_vmlinux_path(map->dso, map); if (ret > 0) - dso__set_loaded(map->dso, type); + dso__set_loaded(map->dso); return ret; } @@ -1055,10 +1195,9 @@ static bool is_kmod_dso(struct dso *dso) static int map_groups__set_module_path(struct map_groups *mg, const char *path, struct kmod_path *m) { - struct map *map; char *long_name; + struct map *map = map_groups__find_by_name(mg, m->name); - map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name); if (map == NULL) return 0; @@ -1073,8 +1212,10 @@ static int map_groups__set_module_path(struct map_groups *mg, const char *path, * Full name could reveal us kmod compression, so * we need to update the symtab_type if needed. */ - if (m->comp && is_kmod_dso(map->dso)) + if (m->comp && is_kmod_dso(map->dso)) { map->dso->symtab_type++; + map->dso->comp = m->comp; + } return 0; } @@ -1207,19 +1348,14 @@ static int machine__create_modules(struct machine *machine) static void machine__set_kernel_mmap(struct machine *machine, u64 start, u64 end) { - int i; - - for (i = 0; i < MAP__NR_TYPES; i++) { - machine->vmlinux_maps[i]->start = start; - machine->vmlinux_maps[i]->end = end; - - /* - * Be a bit paranoid here, some perf.data file came with - * a zero sized synthesized MMAP event for the kernel. - */ - if (start == 0 && end == 0) - machine->vmlinux_maps[i]->end = ~0ULL; - } + machine->vmlinux_map->start = start; + machine->vmlinux_map->end = end; + /* + * Be a bit paranoid here, some perf.data file came with + * a zero sized synthesized MMAP event for the kernel. + */ + if (start == 0 && end == 0) + machine->vmlinux_map->end = ~0ULL; } int machine__create_kernel_maps(struct machine *machine) @@ -1234,9 +1370,8 @@ int machine__create_kernel_maps(struct machine *machine) return -1; ret = __machine__create_kernel_maps(machine, kernel); - dso__put(kernel); if (ret < 0) - return -1; + goto out_put; if (symbol_conf.use_modules && machine__create_modules(machine) < 0) { if (machine__is_host(machine)) @@ -1249,9 +1384,10 @@ int machine__create_kernel_maps(struct machine *machine) if (!machine__get_running_kernel_start(machine, &name, &addr)) { if (name && - maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) { machine__destroy_kernel_maps(machine); - return -1; + ret = -1; + goto out_put; } /* we have a real start address now, so re-order the kmaps */ @@ -1267,12 +1403,16 @@ int machine__create_kernel_maps(struct machine *machine) map__put(map); } + if (machine__create_extra_kernel_maps(machine, kernel)) + pr_debug("Problems creating extra kernel maps, continuing anyway...\n"); + /* update end address of the kernel map using adjacent module address */ map = map__next(machine__kernel_map(machine)); if (map) machine__set_kernel_mmap(machine, addr, map->start); - - return 0; +out_put: + dso__put(kernel); + return ret; } static bool machine__uses_kcore(struct machine *machine) @@ -1287,6 +1427,32 @@ static bool machine__uses_kcore(struct machine *machine) return false; } +static bool perf_event__is_extra_kernel_mmap(struct machine *machine, + union perf_event *event) +{ + return machine__is(machine, "x86_64") && + is_entry_trampoline(event->mmap.filename); +} + +static int machine__process_extra_kernel_map(struct machine *machine, + union perf_event *event) +{ + struct map *kernel_map = machine__kernel_map(machine); + struct dso *kernel = kernel_map ? kernel_map->dso : NULL; + struct extra_kernel_map xm = { + .start = event->mmap.start, + .end = event->mmap.start + event->mmap.len, + .pgoff = event->mmap.pgoff, + }; + + if (kernel == NULL) + return -1; + + strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN); + + return machine__create_extra_kernel_map(machine, kernel, &xm); +} + static int machine__process_kernel_mmap_event(struct machine *machine, union perf_event *event) { @@ -1379,9 +1545,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine, * time /proc/sys/kernel/kptr_restrict was non zero. */ if (event->mmap.pgoff != 0) { - maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, - symbol_name, - event->mmap.pgoff); + map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, + symbol_name, + event->mmap.pgoff); } if (machine__is_default_guest(machine)) { @@ -1390,6 +1556,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine, */ dso__load(kernel, machine__kernel_map(machine)); } + } else if (perf_event__is_extra_kernel_mmap(machine, event)) { + return machine__process_extra_kernel_map(machine, event); } return 0; out_problem: @@ -1402,7 +1570,6 @@ int machine__process_mmap2_event(struct machine *machine, { struct thread *thread; struct map *map; - enum map_type type; int ret = 0; if (dump_trace) @@ -1421,11 +1588,6 @@ int machine__process_mmap2_event(struct machine *machine, if (thread == NULL) goto out_problem; - if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) - type = MAP__VARIABLE; - else - type = MAP__FUNCTION; - map = map__new(machine, event->mmap2.start, event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj, @@ -1433,7 +1595,7 @@ int machine__process_mmap2_event(struct machine *machine, event->mmap2.ino_generation, event->mmap2.prot, event->mmap2.flags, - event->mmap2.filename, type, thread); + event->mmap2.filename, thread); if (map == NULL) goto out_problem_map; @@ -1460,7 +1622,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event { struct thread *thread; struct map *map; - enum map_type type; + u32 prot = 0; int ret = 0; if (dump_trace) @@ -1479,16 +1641,14 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event if (thread == NULL) goto out_problem; - if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) - type = MAP__VARIABLE; - else - type = MAP__FUNCTION; + if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA)) + prot = PROT_EXEC; map = map__new(machine, event->mmap.start, event->mmap.len, event->mmap.pgoff, - 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, prot, 0, event->mmap.filename, - type, thread); + thread); if (map == NULL) goto out_problem_map; @@ -1515,7 +1675,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, struct threads *threads = machine__threads(machine, th->tid); if (threads->last_match == th) - threads->last_match = NULL; + threads__set_last_match(threads, NULL); BUG_ON(refcount_read(&th->refcnt) == 0); if (lock) @@ -1548,6 +1708,7 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + bool do_maps_clone = true; int err = 0; if (dump_trace) @@ -1576,9 +1737,25 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); + /* + * When synthesizing FORK events, we are trying to create thread + * objects for the already running tasks on the machine. + * + * Normally, for a kernel FORK event, we want to clone the parent's + * maps because that is what the kernel just did. + * + * But when synthesizing, this should not be done. If we do, we end up + * with overlapping maps as we process the sythesized MMAP2 events that + * get delivered shortly thereafter. + * + * Use the FORK event misc flags in an internal way to signal this + * situation, so we can elide the map clone when appropriate. + */ + if (event->fork.header.misc & PERF_RECORD_MISC_FORK_EXEC) + do_maps_clone = false; if (thread == NULL || parent == NULL || - thread__fork(thread, parent, sample->time) < 0) { + thread__fork(thread, parent, sample->time, do_maps_clone) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); err = -1; } @@ -1664,7 +1841,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); + thread__find_cpumode_addr_location(thread, ip, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -1681,15 +1858,7 @@ static void ip__resolve_data(struct thread *thread, memset(&al, 0, sizeof(al)); - thread__find_addr_location(thread, m, MAP__VARIABLE, addr, &al); - if (al.map == NULL) { - /* - * some shared data regions have execute bit set which puts - * their mapping in the MAP__FUNCTION type array. - * Check there as a fallback option before dropping the sample. - */ - thread__find_addr_location(thread, m, MAP__FUNCTION, addr, &al); - } + thread__find_symbol(thread, m, addr, &al); ams->addr = addr; ams->al_addr = al.addr; @@ -1758,8 +1927,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, MAP__FUNCTION, - ip, &al); + thread__find_cpumode_addr_location(thread, ip, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -1784,8 +1952,7 @@ static int add_callchain_ip(struct thread *thread, } return 0; } - thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, - ip, &al); + thread__find_symbol(thread, *cpumode, ip, &al); } if (al.sym != NULL) { @@ -1810,7 +1977,7 @@ static int add_callchain_ip(struct thread *thread, } srcline = callchain_srcline(al.map, al.sym, al.addr); - return callchain_cursor_append(cursor, al.addr, al.map, al.sym, + return callchain_cursor_append(cursor, ip, al.map, al.sym, branch, flags, nr_loop_iter, iter_cycles, branch_from, srcline); } @@ -1990,6 +2157,27 @@ static int resolve_lbr_callchain_sample(struct thread *thread, return 0; } +static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread, + struct callchain_cursor *cursor, + struct symbol **parent, + struct addr_location *root_al, + u8 *cpumode, int ent) +{ + int err = 0; + + while (--ent >= 0) { + u64 ip = chain->ips[ent]; + + if (ip >= PERF_CONTEXT_MAX) { + err = add_callchain_ip(thread, cursor, parent, + root_al, cpumode, ip, + false, NULL, NULL, 0); + break; + } + } + return err; +} + static int thread__resolve_callchain_sample(struct thread *thread, struct callchain_cursor *cursor, struct perf_evsel *evsel, @@ -2096,6 +2284,12 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: + if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, root_al, + &cpumode, chain->nr - first_call); + if (err) + return (err < 0) ? err : 0; + } for (i = first_call, nr_entries = 0; i < chain_nr && nr_entries < max_stack; i++) { u64 ip; @@ -2110,9 +2304,15 @@ check_calls: continue; #endif ip = chain->ips[j]; - if (ip < PERF_CONTEXT_MAX) ++nr_entries; + else if (callchain_param.order != ORDER_CALLEE) { + err = find_prev_cpumode(chain, thread, cursor, parent, + root_al, &cpumode, j); + if (err) + return (err < 0) ? err : 0; + continue; + } err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, @@ -2136,7 +2336,8 @@ static int append_inlines(struct callchain_cursor *cursor, if (!symbol_conf.inline_name || !map || !sym) return ret; - addr = map__rip_2objdump(map, ip); + addr = map__map_ip(map, ip); + addr = map__rip_2objdump(map, addr); inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr); if (!inline_node) { @@ -2162,6 +2363,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) { struct callchain_cursor *cursor = arg; const char *srcline = NULL; + u64 addr = entry->ip; if (symbol_conf.hide_unresolved && entry->sym == NULL) return 0; @@ -2169,7 +2371,14 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0) return 0; - srcline = callchain_srcline(entry->map, entry->sym, entry->ip); + /* + * Convert entry->ip from a virtual address to an offset in + * its corresponding binary. + */ + if (entry->map) + addr = map__map_ip(entry->map, entry->ip); + + srcline = callchain_srcline(entry->map, entry->sym, addr); return callchain_cursor_append(cursor, entry->ip, entry->map, entry->sym, false, NULL, 0, 0, 0, srcline); @@ -2342,6 +2551,20 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return 0; } +/* + * Compares the raw arch string. N.B. see instead perf_env__arch() if a + * normalized arch is needed. + */ +bool machine__is(struct machine *machine, const char *arch) +{ + return machine && !strcmp(perf_env__raw_arch(machine->env), arch); +} + +int machine__nr_cpus_avail(struct machine *machine) +{ + return machine ? perf_env__nr_cpus_avail(machine->env) : 0; +} + int machine__get_kernel_start(struct machine *machine) { struct map *map = machine__kernel_map(machine); @@ -2358,7 +2581,12 @@ int machine__get_kernel_start(struct machine *machine) machine->kernel_start = 1ULL << 63; if (map) { err = map__load(map); - if (!err) + /* + * On x86_64, PTI entry trampolines are less than the + * start of kernel text, but still above 2^63. So leave + * kernel_start = 1ULL << 63 for x86_64. + */ + if (!err && !machine__is(machine, "x86_64")) machine->kernel_start = map->start; } return err; @@ -2373,7 +2601,7 @@ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, ch { struct machine *machine = vmachine; struct map *map; - struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map); + struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map); if (sym == NULL) return NULL; diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 66cc200ef86f..d856b85862e2 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -49,13 +49,14 @@ struct machine { struct perf_env *env; struct dsos dsos; struct map_groups kmaps; - struct map *vmlinux_maps[MAP__NR_TYPES]; + struct map *vmlinux_map; u64 kernel_start; pid_t *current_tid; union { /* Tool specific area */ void *priv; u64 db_id; }; + bool trampolines_mapped; }; static inline struct threads *machine__threads(struct machine *machine, pid_t tid) @@ -64,16 +65,22 @@ static inline struct threads *machine__threads(struct machine *machine, pid_t ti return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE]; } +/* + * The main kernel (vmlinux) map + */ static inline -struct map *__machine__kernel_map(struct machine *machine, enum map_type type) +struct map *machine__kernel_map(struct machine *machine) { - return machine->vmlinux_maps[type]; + return machine->vmlinux_map; } +/* + * kernel (the one returned by machine__kernel_map()) plus kernel modules maps + */ static inline -struct map *machine__kernel_map(struct machine *machine) +struct maps *machine__kernel_maps(struct machine *machine) { - return __machine__kernel_map(machine, MAP__FUNCTION); + return &machine->kmaps.maps; } int machine__get_kernel_start(struct machine *machine); @@ -182,6 +189,9 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } +bool machine__is(struct machine *machine, const char *arch); +int machine__nr_cpus_avail(struct machine *machine); + struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); @@ -190,44 +200,27 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); static inline -struct symbol *machine__find_kernel_symbol(struct machine *machine, - enum map_type type, u64 addr, +struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr, struct map **mapp) { - return map_groups__find_symbol(&machine->kmaps, type, addr, mapp); + return map_groups__find_symbol(&machine->kmaps, addr, mapp); } static inline struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine, - enum map_type type, const char *name, + const char *name, struct map **mapp) { - return map_groups__find_symbol_by_name(&machine->kmaps, type, name, mapp); -} - -static inline -struct symbol *machine__find_kernel_function(struct machine *machine, u64 addr, - struct map **mapp) -{ - return machine__find_kernel_symbol(machine, MAP__FUNCTION, addr, - mapp); -} - -static inline -struct symbol *machine__find_kernel_function_by_name(struct machine *machine, - const char *name, - struct map **mapp) -{ - return map_groups__find_function_by_name(&machine->kmaps, name, mapp); + return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp); } struct map *machine__findnew_module_map(struct machine *machine, u64 start, const char *filename); int arch__fix_module_text_start(u64 *start, const char *name); -int machine__load_kallsyms(struct machine *machine, const char *filename, - enum map_type type); -int machine__load_vmlinux_path(struct machine *machine, enum map_type type); +int machine__load_kallsyms(struct machine *machine, const char *filename); + +int machine__load_vmlinux_path(struct machine *machine); size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm); @@ -272,8 +265,29 @@ pid_t machine__get_current_tid(struct machine *machine, int cpu); int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, pid_t tid); /* - * For use with libtraceevent's pevent_set_function_resolver() + * For use with libtraceevent's tep_set_function_resolver() */ char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); +void machine__get_kallsyms_filename(struct machine *machine, char *buf, + size_t bufsz); + +int machine__create_extra_kernel_maps(struct machine *machine, + struct dso *kernel); + +/* Kernel-space maps for symbols that are outside the main kernel map and module maps */ +struct extra_kernel_map { + u64 start; + u64 end; + u64 pgoff; + char name[KMAP_NAME_LEN]; +}; + +int machine__create_extra_kernel_map(struct machine *machine, + struct dso *kernel, + struct extra_kernel_map *xm); + +int machine__map_x86_64_entry_trampolines(struct machine *machine, + struct dso *kernel); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 8fe57031e1a8..354e54550d2b 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -22,11 +22,6 @@ static void __maps__insert(struct maps *maps, struct map *map); -const char *map_type__name[MAP__NR_TYPES] = { - [MAP__FUNCTION] = "Functions", - [MAP__VARIABLE] = "Variables", -}; - static inline int is_anon_memory(const char *filename, u32 flags) { return flags & MAP_HUGETLB || @@ -129,10 +124,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return false; } -void map__init(struct map *map, enum map_type type, - u64 start, u64 end, u64 pgoff, struct dso *dso) +void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso) { - map->type = type; map->start = start; map->end = end; map->pgoff = pgoff; @@ -149,7 +142,7 @@ void map__init(struct map *map, enum map_type type, struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, char *filename, - enum map_type type, struct thread *thread) + struct thread *thread) { struct map *map = malloc(sizeof(*map)); struct nsinfo *nsi = NULL; @@ -173,7 +166,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, map->flags = flags; nsi = nsinfo__get(thread->nsinfo); - if ((anon || no_dso) && nsi && type == MAP__FUNCTION) { + if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) { snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", nsi->pid); filename = newfilename; @@ -203,7 +196,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (dso == NULL) goto out_delete; - map__init(map, type, start, start + len, pgoff, dso); + map__init(map, start, start + len, pgoff, dso); if (anon || no_dso) { map->map_ip = map->unmap_ip = identity__map_ip; @@ -213,8 +206,8 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, * functions still return NULL, and we avoid the * unnecessary map__load warning. */ - if (type != MAP__FUNCTION) - dso__set_loaded(dso, map->type); + if (!(prot & PROT_EXEC)) + dso__set_loaded(dso); } dso->nsinfo = nsi; dso__put(dso); @@ -231,7 +224,7 @@ out_delete: * they are loaded) and for vmlinux, where only after we load all the * symbols we'll know where it starts and ends. */ -struct map *map__new2(u64 start, struct dso *dso, enum map_type type) +struct map *map__new2(u64 start, struct dso *dso) { struct map *map = calloc(1, (sizeof(*map) + (dso->kernel ? sizeof(struct kmap) : 0))); @@ -239,7 +232,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) /* * ->end will be filled after we load all the symbols */ - map__init(map, type, start, 0, 0, dso); + map__init(map, start, 0, 0, dso); } return map; @@ -256,7 +249,19 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) */ bool __map__is_kernel(const struct map *map) { - return __machine__kernel_map(map->groups->machine, map->type) == map; + return machine__kernel_map(map->groups->machine) == map; +} + +bool __map__is_extra_kernel_map(const struct map *map) +{ + struct kmap *kmap = __map__kmap((struct map *)map); + + return kmap && kmap->name[0]; +} + +bool map__has_symbols(const struct map *map) +{ + return dso__has_symbols(map->dso); } static void map__exit(struct map *map) @@ -279,7 +284,7 @@ void map__put(struct map *map) void map__fixup_start(struct map *map) { - struct rb_root *symbols = &map->dso->symbols[map->type]; + struct rb_root *symbols = &map->dso->symbols; struct rb_node *nd = rb_first(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -289,7 +294,7 @@ void map__fixup_start(struct map *map) void map__fixup_end(struct map *map) { - struct rb_root *symbols = &map->dso->symbols[map->type]; + struct rb_root *symbols = &map->dso->symbols; struct rb_node *nd = rb_last(symbols); if (nd != NULL) { struct symbol *sym = rb_entry(nd, struct symbol, rb_node); @@ -304,7 +309,7 @@ int map__load(struct map *map) const char *name = map->dso->long_name; int nr; - if (dso__loaded(map->dso, map->type)) + if (dso__loaded(map->dso)) return 0; nr = dso__load(map->dso, map); @@ -315,12 +320,11 @@ int map__load(struct map *map) build_id__sprintf(map->dso->build_id, sizeof(map->dso->build_id), sbuild_id); - pr_warning("%s with build id %s not found", - name, sbuild_id); + pr_debug("%s with build id %s not found", name, sbuild_id); } else - pr_warning("Failed to open %s", name); + pr_debug("Failed to open %s", name); - pr_warning(", continuing without symbols\n"); + pr_debug(", continuing without symbols\n"); return -1; } else if (nr == 0) { #ifdef HAVE_LIBELF_SUPPORT @@ -329,12 +333,11 @@ int map__load(struct map *map) if (len > sizeof(DSO__DELETED) && strcmp(name + real_len + 1, DSO__DELETED) == 0) { - pr_warning("%.*s was updated (is prelink enabled?). " + pr_debug("%.*s was updated (is prelink enabled?). " "Restart the long running apps that use it!\n", (int)real_len, name); } else { - pr_warning("no symbols found in %s, maybe install " - "a debug package?\n", name); + pr_debug("no symbols found in %s, maybe install a debug package?\n", name); } #endif return -1; @@ -348,7 +351,7 @@ struct symbol *map__find_symbol(struct map *map, u64 addr) if (map__load(map) < 0) return NULL; - return dso__find_symbol(map->dso, map->type, addr); + return dso__find_symbol(map->dso, addr); } struct symbol *map__find_symbol_by_name(struct map *map, const char *name) @@ -356,10 +359,10 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name) if (map__load(map) < 0) return NULL; - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); + if (!dso__sorted_by_name(map->dso)) + dso__sort_by_name(map->dso); - return dso__find_symbol_by_name(map->dso, map->type, name); + return dso__find_symbol_by_name(map->dso, name); } struct map *map__clone(struct map *from) @@ -376,20 +379,6 @@ struct map *map__clone(struct map *from) return map; } -int map__overlap(struct map *l, struct map *r) -{ - if (l->start > r->start) { - struct map *t = l; - l = r; - r = t; - } - - if (l->end > r->start) - return 1; - - return 0; -} - size_t map__fprintf(struct map *map, FILE *fp) { return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n", @@ -410,16 +399,20 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp) return fprintf(fp, "%s", dsoname); } +char *map__srcline(struct map *map, u64 addr, struct symbol *sym) +{ + if (map == NULL) + return SRCLINE_UNKNOWN; + return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr); +} + int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp) { - char *srcline; int ret = 0; if (map && map->dso) { - srcline = get_srcline(map->dso, - map__rip_2objdump(map, addr), NULL, - true, true, addr); + char *srcline = map__srcline(map, addr, NULL); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); @@ -440,6 +433,20 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, */ u64 map__rip_2objdump(struct map *map, u64 rip) { + struct kmap *kmap = __map__kmap(map); + + /* + * vmlinux does not have program headers for PTI entry trampolines and + * kcore may not either. However the trampoline object code is on the + * main kernel map, so just use that instead. + */ + if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) { + struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine); + + if (kernel_map) + map = kernel_map; + } + if (!map->dso->adjust_symbols) return rip; @@ -494,10 +501,7 @@ static void maps__init(struct maps *maps) void map_groups__init(struct map_groups *mg, struct machine *machine) { - int i; - for (i = 0; i < MAP__NR_TYPES; ++i) { - maps__init(&mg->maps[i]); - } + maps__init(&mg->maps); mg->machine = machine; refcount_set(&mg->refcnt, 1); } @@ -525,22 +529,12 @@ static void maps__exit(struct maps *maps) void map_groups__exit(struct map_groups *mg) { - int i; - - for (i = 0; i < MAP__NR_TYPES; ++i) - maps__exit(&mg->maps[i]); + maps__exit(&mg->maps); } bool map_groups__empty(struct map_groups *mg) { - int i; - - for (i = 0; i < MAP__NR_TYPES; ++i) { - if (maps__first(&mg->maps[i])) - return false; - } - - return true; + return !maps__first(&mg->maps); } struct map_groups *map_groups__new(struct machine *machine) @@ -566,10 +560,9 @@ void map_groups__put(struct map_groups *mg) } struct symbol *map_groups__find_symbol(struct map_groups *mg, - enum map_type type, u64 addr, - struct map **mapp) + u64 addr, struct map **mapp) { - struct map *map = map_groups__find(mg, type, addr); + struct map *map = map_groups__find(mg, addr); /* Ensure map is loaded before using map->map_ip */ if (map != NULL && map__load(map) >= 0) { @@ -581,6 +574,13 @@ struct symbol *map_groups__find_symbol(struct map_groups *mg, return NULL; } +static bool map__contains_symbol(struct map *map, struct symbol *sym) +{ + u64 ip = map->unmap_ip(map, sym->start); + + return ip >= map->start && ip < map->end; +} + struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { @@ -596,6 +596,10 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, if (sym == NULL) continue; + if (!map__contains_symbol(pos, sym)) { + sym = NULL; + continue; + } if (mapp != NULL) *mapp = pos; goto out; @@ -608,13 +612,10 @@ out: } struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, const char *name, struct map **mapp) { - struct symbol *sym = maps__find_symbol_by_name(&mg->maps[type], name, mapp); - - return sym; + return maps__find_symbol_by_name(&mg->maps, name, mapp); } int map_groups__find_ams(struct addr_map_symbol *ams) @@ -622,8 +623,7 @@ int map_groups__find_ams(struct addr_map_symbol *ams) if (ams->addr < ams->map->start || ams->addr >= ams->map->end) { if (ams->map->groups == NULL) return -1; - ams->map = map_groups__find(ams->map->groups, ams->map->type, - ams->addr); + ams->map = map_groups__find(ams->map->groups, ams->addr); if (ams->map == NULL) return -1; } @@ -646,7 +646,7 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); if (verbose > 2) { - printed += dso__fprintf(pos->dso, pos->type, fp); + printed += dso__fprintf(pos->dso, fp); printed += fprintf(fp, "--\n"); } } @@ -656,50 +656,61 @@ static size_t maps__fprintf(struct maps *maps, FILE *fp) return printed; } -size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, - FILE *fp) -{ - size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); - return printed += maps__fprintf(&mg->maps[type], fp); -} - size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) { - size_t printed = 0, i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_maps(mg, i, fp); - return printed; + return maps__fprintf(&mg->maps, fp); } static void __map_groups__insert(struct map_groups *mg, struct map *map) { - __maps__insert(&mg->maps[map->type], map); + __maps__insert(&mg->maps, map); map->groups = mg; } static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) { struct rb_root *root; - struct rb_node *next; + struct rb_node *next, *first; int err = 0; down_write(&maps->lock); root = &maps->entries; - next = rb_first(root); + /* + * Find first map where end > map->start. + * Same as find_vma() in kernel. + */ + next = root->rb_node; + first = NULL; + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node); + + if (pos->end > map->start) { + first = next; + if (pos->start <= map->start) + break; + next = next->rb_left; + } else + next = next->rb_right; + } + + next = first; while (next) { struct map *pos = rb_entry(next, struct map, rb_node); next = rb_next(&pos->rb_node); - if (!map__overlap(pos, map)) - continue; + /* + * Stop if current map starts after map->end. + * Maps are ordered by start: next will not overlap for sure. + */ + if (pos->start >= map->end) + break; if (verbose >= 2) { if (use_browser) { - pr_warning("overlapping maps in %s " - "(disable tui for more info)\n", + pr_debug("overlapping maps in %s (disable tui for more info)\n", map->dso->name); } else { fputs("overlapping maps:\n", fp); @@ -758,19 +769,18 @@ out: int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp) { - return maps__fixup_overlappings(&mg->maps[map->type], map, fp); + return maps__fixup_overlappings(&mg->maps, map, fp); } /* * XXX This should not really _copy_ te maps, but refcount them. */ -int map_groups__clone(struct thread *thread, - struct map_groups *parent, enum map_type type) +int map_groups__clone(struct thread *thread, struct map_groups *parent) { struct map_groups *mg = thread->mg; int err = -ENOMEM; struct map *map; - struct maps *maps = &parent->maps[type]; + struct maps *maps = &parent->maps; down_read(&maps->lock); @@ -877,15 +887,22 @@ struct map *map__next(struct map *map) return NULL; } -struct kmap *map__kmap(struct map *map) +struct kmap *__map__kmap(struct map *map) { - if (!map->dso || !map->dso->kernel) { - pr_err("Internal error: map__kmap with a non-kernel map\n"); + if (!map->dso || !map->dso->kernel) return NULL; - } return (struct kmap *)(map + 1); } +struct kmap *map__kmap(struct map *map) +{ + struct kmap *kmap = __map__kmap(map); + + if (!kmap) + pr_err("Internal error: map__kmap with a non-kernel map\n"); + return kmap; +} + struct map_groups *map__kmaps(struct map *map) { struct kmap *kmap = map__kmap(map); diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e9bbe01b0ab..e0f327b51e66 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -8,19 +8,11 @@ #include <linux/rbtree.h> #include <pthread.h> #include <stdio.h> +#include <string.h> #include <stdbool.h> #include <linux/types.h> #include "rwsem.h" -enum map_type { - MAP__FUNCTION = 0, - MAP__VARIABLE, -}; - -#define MAP__NR_TYPES (MAP__VARIABLE + 1) - -extern const char *map_type__name[MAP__NR_TYPES]; - struct dso; struct ip_callchain; struct ref_reloc_sym; @@ -35,7 +27,6 @@ struct map { }; u64 start; u64 end; - u8 /* enum map_type */ type; bool erange_warned; u32 priv; u32 prot; @@ -56,9 +47,12 @@ struct map { refcount_t refcnt; }; +#define KMAP_NAME_LEN 256 + struct kmap { struct ref_reloc_sym *ref_reloc_sym; struct map_groups *kmaps; + char name[KMAP_NAME_LEN]; }; struct maps { @@ -67,7 +61,7 @@ struct maps { }; struct map_groups { - struct maps maps[MAP__NR_TYPES]; + struct maps maps; struct machine *machine; refcount_t refcnt; }; @@ -85,6 +79,7 @@ static inline struct map_groups *map_groups__get(struct map_groups *mg) void map_groups__put(struct map_groups *mg); +struct kmap *__map__kmap(struct map *map); struct kmap *map__kmap(struct map *map); struct map_groups *map__kmaps(struct map *map); @@ -125,7 +120,7 @@ struct thread; * Note: caller must ensure map->dso is not NULL (map is loaded). */ #define map__for_each_symbol(map, pos, n) \ - dso__for_each_symbol(map->dso, pos, n, map->type) + dso__for_each_symbol(map->dso, pos, n) /* map__for_each_symbol_with_name - iterate over the symbols in the given map * that have the given name @@ -144,13 +139,13 @@ struct thread; #define map__for_each_symbol_by_name(map, sym_name, pos) \ __map__for_each_symbol_by_name(map, sym_name, (pos)) -void map__init(struct map *map, enum map_type type, +void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, u64 pgoff, u32 d_maj, u32 d_min, u64 ino, u64 ino_gen, u32 prot, u32 flags, - char *filename, enum map_type type, struct thread *thread); -struct map *map__new2(u64 start, struct dso *dso, enum map_type type); + char *filename, struct thread *thread); +struct map *map__new2(u64 start, struct dso *dso); void map__delete(struct map *map); struct map *map__clone(struct map *map); @@ -171,9 +166,9 @@ static inline void __map__zput(struct map **map) #define map__zput(map) __map__zput(&map) -int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); +char *map__srcline(struct map *map, u64 addr, struct symbol *sym); int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, FILE *fp); @@ -185,8 +180,6 @@ void map__fixup_end(struct map *map); void map__reloc_vmlinux(struct map *map); -size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, - FILE *fp); void maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); struct map *maps__find(struct maps *maps, u64 addr); @@ -197,34 +190,29 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct thread *thread, - struct map_groups *parent, enum map_type type); + struct map_groups *parent); size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); -int maps__set_kallsyms_ref_reloc_sym(struct map **maps, const char *symbol_name, - u64 addr); +int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, + u64 addr); static inline void map_groups__insert(struct map_groups *mg, struct map *map) { - maps__insert(&mg->maps[map->type], map); + maps__insert(&mg->maps, map); map->groups = mg; } static inline void map_groups__remove(struct map_groups *mg, struct map *map) { - maps__remove(&mg->maps[map->type], map); + maps__remove(&mg->maps, map); } -static inline struct map *map_groups__find(struct map_groups *mg, - enum map_type type, u64 addr) +static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) { - return maps__find(&mg->maps[type], addr); + return maps__find(&mg->maps, addr); } -static inline struct map *map_groups__first(struct map_groups *mg, - enum map_type type) -{ - return maps__first(&mg->maps[type]); -} +struct map *map_groups__first(struct map_groups *mg); static inline struct map *map_groups__next(struct map *map) { @@ -232,11 +220,9 @@ static inline struct map *map_groups__next(struct map *map) } struct symbol *map_groups__find_symbol(struct map_groups *mg, - enum map_type type, u64 addr, - struct map **mapp); + u64 addr, struct map **mapp); struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, - enum map_type type, const char *name, struct map **mapp); @@ -244,24 +230,26 @@ struct addr_map_symbol; int map_groups__find_ams(struct addr_map_symbol *ams); -static inline -struct symbol *map_groups__find_function_by_name(struct map_groups *mg, - const char *name, struct map **mapp) -{ - return map_groups__find_symbol_by_name(mg, MAP__FUNCTION, name, mapp); -} - int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); -struct map *map_groups__find_by_name(struct map_groups *mg, - enum map_type type, const char *name); +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); bool __map__is_kernel(const struct map *map); +bool __map__is_extra_kernel_map(const struct map *map); static inline bool __map__is_kmodule(const struct map *map) { - return !__map__is_kernel(map); + return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map); +} + +bool map__has_symbols(const struct map *map); + +#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline" + +static inline bool is_entry_trampoline(const char *name) +{ + return !strcmp(name, ENTRY_TRAMPOLINE_NAME); } #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 1ddc3d1d0147..a28f9b5cc4ff 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -326,8 +326,8 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, if (raw) s = (char *)pe->metric_name; else { - if (asprintf(&s, "%s\n\t[%s]", - pe->metric_name, pe->desc) < 0) + if (asprintf(&s, "%s\n%*s%s]", + pe->metric_name, 8, "[", pe->desc) < 0) return; } @@ -490,3 +490,25 @@ out: metricgroup__free_egroups(&group_list); return ret; } + +bool metricgroup__has_metric(const char *metric) +{ + struct pmu_events_map *map = perf_pmu__find_map(NULL); + struct pmu_event *pe; + int i; + + if (!map) + return false; + + for (i = 0; ; i++) { + pe = &map->table[i]; + + if (!pe->name && !pe->metric_group && !pe->metric_name) + break; + if (!pe->metric_expr) + continue; + if (match_metric(pe->metric_name, metric)) + return true; + } + return false; +} diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 06854e125ee7..8a155dba0581 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -28,4 +28,5 @@ int metricgroup__parse_groups(const struct option *opt, struct rblist *metric_events); void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); +bool metricgroup__has_metric(const char *metric); #endif diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index fc832676a798..cdb95b3a1213 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -164,7 +164,7 @@ void perf_mmap__munmap(struct perf_mmap *map) auxtrace_mmap__munmap(&map->auxtrace_mmap); } -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) { /* * The last one will be done at perf_mmap__consume(), so that we @@ -191,6 +191,7 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return -1; } map->fd = fd; + map->cpu = cpu; if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) @@ -280,7 +281,7 @@ int perf_mmap__read_init(struct perf_mmap *map) } int perf_mmap__push(struct perf_mmap *md, void *to, - int push(void *to, void *buf, size_t size)) + int push(struct perf_mmap *map, void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); unsigned char *data = md->base + page_size; @@ -299,7 +300,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, size = md->mask + 1 - (md->start & md->mask); md->start += size; - if (push(to, buf, size) < 0) { + if (push(md, to, buf, size) < 0) { rc = -1; goto out; } @@ -309,7 +310,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, size = md->end - md->start; md->start += size; - if (push(to, buf, size) < 0) { + if (push(md, to, buf, size) < 0) { rc = -1; goto out; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index d82294db1295..cc5e2d6d17a9 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -4,7 +4,7 @@ #include <linux/compiler.h> #include <linux/refcount.h> #include <linux/types.h> -#include <asm/barrier.h> +#include <linux/ring_buffer.h> #include <stdbool.h> #include "auxtrace.h" #include "event.h" @@ -18,6 +18,7 @@ struct perf_mmap { void *base; int mask; int fd; + int cpu; refcount_t refcnt; u64 prev; u64 start; @@ -60,7 +61,7 @@ struct mmap_params { struct auxtrace_mmap_params auxtrace_mp; }; -int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd); +int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu); void perf_mmap__munmap(struct perf_mmap *map); void perf_mmap__get(struct perf_mmap *map); @@ -70,21 +71,12 @@ void perf_mmap__consume(struct perf_mmap *map); static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { - struct perf_event_mmap_page *pc = mm->base; - u64 head = READ_ONCE(pc->data_head); - rmb(); - return head; + return ring_buffer_read_head(mm->base); } static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { - struct perf_event_mmap_page *pc = md->base; - - /* - * ensure all reads are done before we write the tail out. - */ - mb(); - pc->data_tail = tail; + ring_buffer_write_tail(md->base, tail); } union perf_event *perf_mmap__read_forward(struct perf_mmap *map); @@ -92,7 +84,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_event(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, void *to, - int push(void *to, void *buf, size_t size)); + int push(struct perf_mmap *map, void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c index 5be021701f34..aed170bd4384 100644 --- a/tools/perf/util/namespaces.c +++ b/tools/perf/util/namespaces.c @@ -18,6 +18,7 @@ #include <stdio.h> #include <string.h> #include <unistd.h> +#include <asm/bug.h> struct namespaces *namespaces__new(struct namespaces_event *event) { @@ -139,6 +140,9 @@ struct nsinfo *nsinfo__copy(struct nsinfo *nsi) { struct nsinfo *nnsi; + if (nsi == NULL) + return NULL; + nnsi = calloc(1, sizeof(*nnsi)); if (nnsi != NULL) { nnsi->pid = nsi->pid; @@ -183,6 +187,7 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, char curpath[PATH_MAX]; int oldns = -1; int newns = -1; + char *oldcwd = NULL; if (nc == NULL) return; @@ -196,9 +201,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX) return; + oldcwd = get_current_dir_name(); + if (!oldcwd) + return; + oldns = open(curpath, O_RDONLY); if (oldns < 0) - return; + goto errout; newns = open(nsi->mntns_path, O_RDONLY); if (newns < 0) @@ -207,11 +216,13 @@ void nsinfo__mountns_enter(struct nsinfo *nsi, if (setns(newns, CLONE_NEWNS) < 0) goto errout; + nc->oldcwd = oldcwd; nc->oldns = oldns; nc->newns = newns; return; errout: + free(oldcwd); if (oldns > -1) close(oldns); if (newns > -1) @@ -220,11 +231,16 @@ errout: void nsinfo__mountns_exit(struct nscookie *nc) { - if (nc == NULL || nc->oldns == -1 || nc->newns == -1) + if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd) return; setns(nc->oldns, CLONE_NEWNS); + if (nc->oldcwd) { + WARN_ON_ONCE(chdir(nc->oldcwd)); + zfree(&nc->oldcwd); + } + if (nc->oldns > -1) { close(nc->oldns); nc->oldns = -1; diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h index 760558dcfd18..d5f46c09ea31 100644 --- a/tools/perf/util/namespaces.h +++ b/tools/perf/util/namespaces.h @@ -10,6 +10,7 @@ #define __PERF_NAMESPACES_H #include <sys/types.h> +#include <linux/stddef.h> #include <linux/perf_event.h> #include <linux/refcount.h> #include <linux/types.h> @@ -37,6 +38,7 @@ struct nsinfo { struct nscookie { int oldns; int newns; + char *oldcwd; }; int nsinfo__init(struct nsinfo *nsi); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index bad9e0296e9a..1904e7f6ec84 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -80,14 +80,20 @@ static union perf_event *dup_event(struct ordered_events *oe, return oe->copy_on_queue ? __dup_event(oe, event) : event; } -static void free_dup_event(struct ordered_events *oe, union perf_event *event) +static void __free_dup_event(struct ordered_events *oe, union perf_event *event) { - if (event && oe->copy_on_queue) { + if (event) { oe->cur_alloc_size -= event->header.size; free(event); } } +static void free_dup_event(struct ordered_events *oe, union perf_event *event) +{ + if (oe->copy_on_queue) + __free_dup_event(oe, event); +} + #define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct ordered_event)) static struct ordered_event *alloc_event(struct ordered_events *oe, union perf_event *event) @@ -95,21 +101,49 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, struct list_head *cache = &oe->cache; struct ordered_event *new = NULL; union perf_event *new_event; + size_t size; new_event = dup_event(oe, event); if (!new_event) return NULL; + /* + * We maintain the following scheme of buffers for ordered + * event allocation: + * + * to_free list -> buffer1 (64K) + * buffer2 (64K) + * ... + * + * Each buffer keeps an array of ordered events objects: + * buffer -> event[0] + * event[1] + * ... + * + * Each allocated ordered event is linked to one of + * following lists: + * - time ordered list 'events' + * - list of currently removed events 'cache' + * + * Allocation of the ordered event uses the following order + * to get the memory: + * - use recently removed object from 'cache' list + * - use available object in current allocation buffer + * - allocate new buffer if the current buffer is full + * + * Removal of ordered event object moves it from events to + * the cache list. + */ + size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new); + if (!list_empty(cache)) { new = list_entry(cache->next, struct ordered_event, list); list_del(&new->list); } else if (oe->buffer) { - new = oe->buffer + oe->buffer_idx; + new = &oe->buffer->event[oe->buffer_idx]; if (++oe->buffer_idx == MAX_SAMPLE_BUFFER) oe->buffer = NULL; - } else if (oe->cur_alloc_size < oe->max_alloc_size) { - size_t size = MAX_SAMPLE_BUFFER * sizeof(*new); - + } else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) { oe->buffer = malloc(size); if (!oe->buffer) { free_dup_event(oe, new_event); @@ -122,11 +156,11 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, oe->cur_alloc_size += size; list_add(&oe->buffer->list, &oe->to_free); - /* First entry is abused to maintain the to_free list. */ - oe->buffer_idx = 2; - new = oe->buffer + 1; + oe->buffer_idx = 1; + new = &oe->buffer->event[0]; } else { pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size); + return NULL; } new->event = new_event; @@ -300,15 +334,38 @@ void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t d oe->deliver = deliver; } +static void +ordered_events_buffer__free(struct ordered_events_buffer *buffer, + unsigned int max, struct ordered_events *oe) +{ + if (oe->copy_on_queue) { + unsigned int i; + + for (i = 0; i < max; i++) + __free_dup_event(oe, buffer->event[i].event); + } + + free(buffer); +} + void ordered_events__free(struct ordered_events *oe) { - while (!list_empty(&oe->to_free)) { - struct ordered_event *event; + struct ordered_events_buffer *buffer, *tmp; - event = list_entry(oe->to_free.next, struct ordered_event, list); - list_del(&event->list); - free_dup_event(oe, event->event); - free(event); + if (list_empty(&oe->to_free)) + return; + + /* + * Current buffer might not have all the events allocated + * yet, we need to free only allocated ones ... + */ + list_del(&oe->buffer->list); + ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe); + + /* ... and continue with the rest */ + list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) { + list_del(&buffer->list); + ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe); } } diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 8c7a2948593e..1338d5c345dc 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -25,23 +25,28 @@ struct ordered_events; typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, struct ordered_event *event); +struct ordered_events_buffer { + struct list_head list; + struct ordered_event event[0]; +}; + struct ordered_events { - u64 last_flush; - u64 next_flush; - u64 max_timestamp; - u64 max_alloc_size; - u64 cur_alloc_size; - struct list_head events; - struct list_head cache; - struct list_head to_free; - struct ordered_event *buffer; - struct ordered_event *last; - ordered_events__deliver_t deliver; - int buffer_idx; - unsigned int nr_events; - enum oe_flush last_flush_type; - u32 nr_unordered_events; - bool copy_on_queue; + u64 last_flush; + u64 next_flush; + u64 max_timestamp; + u64 max_alloc_size; + u64 cur_alloc_size; + struct list_head events; + struct list_head cache; + struct list_head to_free; + struct ordered_events_buffer *buffer; + struct ordered_event *last; + ordered_events__deliver_t deliver; + int buffer_idx; + unsigned int nr_events; + enum oe_flush last_flush_type; + u32 nr_unordered_events; + bool copy_on_queue; }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2fc4ee8b86c1..59be3466d64d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -156,13 +156,12 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { (strcmp(sys_dirent->d_name, ".")) && \ (strcmp(sys_dirent->d_name, ".."))) -static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) +static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir) { char evt_path[MAXPATHLEN]; int fd; - snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", tracing_events_path, - sys_dir->d_name, evt_dir->d_name); + snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) return -EINVAL; @@ -171,12 +170,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ +#define for_each_event(dir_path, evt_dir, evt_dirent) \ while ((evt_dirent = readdir(evt_dir)) != NULL) \ if (evt_dirent->d_type == DT_DIR && \ (strcmp(evt_dirent->d_name, ".")) && \ (strcmp(evt_dirent->d_name, "..")) && \ - (!tp_event_has_id(sys_dirent, evt_dirent))) + (!tp_event_has_id(dir_path, evt_dirent))) #define MAX_EVENT_LENGTH 512 @@ -190,21 +189,21 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) int fd; u64 id; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return NULL; for_each_subsystem(sys_dir, sys_dirent) { - - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dirent->d_name); @@ -218,6 +217,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) close(fd); id = atoll(id_buf); if (id == config) { + put_events_file(dir_path); closedir(evt_dir); closedir(sys_dir); path = zalloc(sizeof(*path)); @@ -242,6 +242,8 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) } } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); @@ -512,14 +514,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, struct parse_events_error *err, struct list_head *head_config) { - char evt_path[MAXPATHLEN]; + char *evt_path; struct dirent *evt_ent; DIR *evt_dir; int ret = 0, found = 0; - snprintf(evt_path, MAXPATHLEN, "%s/%s", tracing_events_path, sys_name); + evt_path = get_events_file(sys_name); + if (!evt_path) { + tracepoint_error(err, errno, sys_name, evt_name); + return -1; + } evt_dir = opendir(evt_path); if (!evt_dir) { + put_events_file(evt_path); tracepoint_error(err, errno, sys_name, evt_name); return -1; } @@ -545,6 +552,7 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx, ret = -1; } + put_events_file(evt_path); closedir(evt_dir); return ret; } @@ -570,7 +578,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, DIR *events_dir; int ret = 0; - events_dir = opendir(tracing_events_path); + events_dir = tracing_events__opendir(); if (!events_dir) { tracepoint_error(err, errno, sys_name, evt_name); return -1; @@ -918,6 +926,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit", [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit", [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack", + [PARSE_EVENTS__TERM_TYPE_MAX_EVENTS] = "nr", [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", @@ -1029,6 +1038,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + CHECK_TYPE_VAL(NUM); + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1076,6 +1088,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_INHERIT: case PARSE_EVENTS__TERM_TYPE_NOINHERIT: case PARSE_EVENTS__TERM_TYPE_MAX_STACK: + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: case PARSE_EVENTS__TERM_TYPE_OVERWRITE: case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: return config_term_common(attr, term, err); @@ -1154,6 +1167,9 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num); break; + case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: + ADD_CONFIG_TERM(MAX_EVENTS, max_events, term->val.num); + break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0); break; @@ -1983,8 +1999,11 @@ static int set_filter(struct perf_evsel *evsel, const void *arg) int nr_addr_filters = 0; struct perf_pmu *pmu = NULL; - if (evsel == NULL) - goto err; + if (evsel == NULL) { + fprintf(stderr, + "--filter option should follow a -e tracepoint or HW tracer option\n"); + return -1; + } if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { if (perf_evsel__append_tp_filter(evsel, str) < 0) { @@ -2006,8 +2025,11 @@ static int set_filter(struct perf_evsel *evsel, const void *arg) perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters); - if (!nr_addr_filters) - goto err; + if (!nr_addr_filters) { + fprintf(stderr, + "This CPU does not support address filtering\n"); + return -1; + } if (perf_evsel__append_addr_filter(evsel, str) < 0) { fprintf(stderr, @@ -2016,12 +2038,6 @@ static int set_filter(struct perf_evsel *evsel, const void *arg) } return 0; - -err: - fprintf(stderr, - "--filter option should follow a -e tracepoint or HW tracer option\n"); - - return -1; } int parse_filter(const struct option *opt, const char *str, @@ -2092,13 +2108,13 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, DIR *sys_dir, *evt_dir; struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; char **evt_list = NULL; unsigned int evt_i = 0, evt_num = 0; bool evt_num_known = false; restart: - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return; @@ -2113,13 +2129,14 @@ restart: !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { if (event_glob != NULL && !strglobmatch(evt_dirent->d_name, event_glob)) continue; @@ -2133,11 +2150,15 @@ restart: sys_dirent->d_name, evt_dirent->d_name); evt_list[evt_i] = strdup(evt_path); - if (evt_list[evt_i] == NULL) + if (evt_list[evt_i] == NULL) { + put_events_file(dir_path); goto out_close_evt_dir; + } evt_i++; } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); @@ -2185,21 +2206,21 @@ int is_valid_tracepoint(const char *event_string) DIR *sys_dir, *evt_dir; struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; - char dir_path[MAXPATHLEN]; + char *dir_path; - sys_dir = opendir(tracing_events_path); + sys_dir = tracing_events__opendir(); if (!sys_dir) return 0; for_each_subsystem(sys_dir, sys_dirent) { - - snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent->d_name); + dir_path = get_events_file(sys_dirent->d_name); + if (!dir_path) + continue; evt_dir = opendir(dir_path); if (!evt_dir) - continue; + goto next; - for_each_event(sys_dirent, evt_dir, evt_dirent) { + for_each_event(dir_path, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); if (!strcmp(evt_path, event_string)) { @@ -2209,6 +2230,8 @@ int is_valid_tracepoint(const char *event_string) } } closedir(evt_dir); +next: + put_events_file(dir_path); } closedir(sys_dir); return 0; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 4473dac27aee..5ed035cbcbb7 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -71,6 +71,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOINHERIT, PARSE_EVENTS__TERM_TYPE_INHERIT, PARSE_EVENTS__TERM_TYPE_MAX_STACK, + PARSE_EVENTS__TERM_TYPE_MAX_EVENTS, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index a1a01b1ac8b8..7805c71aaae2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -53,7 +53,21 @@ static int str(yyscan_t scanner, int token) YYSTYPE *yylval = parse_events_get_lval(scanner); char *text = parse_events_get_text(scanner); - yylval->str = strdup(text); + if (text[0] != '\'') { + yylval->str = strdup(text); + } else { + /* + * If a text tag specified on the command line + * contains opening single quite ' then it is + * expected that the tag ends with single quote + * as well, like this: + * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\' + * quotes need to be escaped to bypass shell + * processing. + */ + yylval->str = strndup(&text[1], strlen(text) - 2); + } + return token; } @@ -176,6 +190,7 @@ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]* +name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\'] name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]* drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? /* If you add a modifier you need to update check_modifier() */ @@ -254,6 +269,7 @@ time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); } +nr { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_EVENTS); } inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } @@ -344,6 +360,7 @@ r{num_raw_hex} { return raw(yyscanner); } {bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); } {bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); } {name} { return pmu_str_check(yyscanner); } +{name_tag} { return str(yyscanner, PE_NAME); } "/" { BEGIN(config); return '/'; } - { return '-'; } , { BEGIN(event); return ','; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index e37608a87dba..da8fe57691b8 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -73,6 +73,7 @@ static void inc_group_count(struct list_head *list, %type <num> value_sym %type <head> event_config %type <head> opt_event_config +%type <head> opt_pmu_config %type <term> event_term %type <head> event_pmu %type <head> event_legacy_symbol @@ -224,13 +225,18 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME opt_event_config +PE_NAME opt_pmu_config { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list, *orig_terms, *terms; if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; + if (error) + error->idx = @1.first_column; + ALLOC_LIST(list); if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { struct perf_pmu *pmu = NULL; @@ -496,6 +502,17 @@ opt_event_config: $$ = NULL; } +opt_pmu_config: +'/' event_config '/' +{ + $$ = $2; +} +| +'/' '/' +{ + $$ = NULL; +} + start_terms: event_config { struct parse_events_state *parse_state = _parse_state; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d2fb597c9a8c..7e49baad304d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -234,6 +234,74 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } +static void perf_pmu_assign_str(char *name, const char *field, char **old_str, + char **new_str) +{ + if (!*old_str) + goto set_new; + + if (*new_str) { /* Have new string, check with old */ + if (strcasecmp(*old_str, *new_str)) + pr_debug("alias %s differs in field '%s'\n", + name, field); + zfree(old_str); + } else /* Nothing new --> keep old string */ + return; +set_new: + *old_str = *new_str; + *new_str = NULL; +} + +static void perf_pmu_update_alias(struct perf_pmu_alias *old, + struct perf_pmu_alias *newalias) +{ + perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc); + perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, + &newalias->long_desc); + perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); + perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr, + &newalias->metric_expr); + perf_pmu_assign_str(old->name, "metric_name", &old->metric_name, + &newalias->metric_name); + perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); + old->scale = newalias->scale; + old->per_pkg = newalias->per_pkg; + old->snapshot = newalias->snapshot; + memcpy(old->unit, newalias->unit, sizeof(old->unit)); +} + +/* Delete an alias entry. */ +static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +{ + zfree(&newalias->name); + zfree(&newalias->desc); + zfree(&newalias->long_desc); + zfree(&newalias->topic); + zfree(&newalias->str); + zfree(&newalias->metric_expr); + zfree(&newalias->metric_name); + parse_events_terms__purge(&newalias->terms); + free(newalias); +} + +/* Merge an alias, search in alias list. If this name is already + * present merge both of them to combine all information. + */ +static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, + struct list_head *alist) +{ + struct perf_pmu_alias *a; + + list_for_each_entry(a, alist, list) { + if (!strcasecmp(newalias->name, a->name)) { + perf_pmu_update_alias(a, newalias); + perf_pmu_free_alias(newalias); + return true; + } + } + return false; +} + static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, @@ -241,9 +309,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *metric_expr, char *metric_name) { + struct parse_events_term *term; struct perf_pmu_alias *alias; int ret; int num; + char newval[256]; alias = malloc(sizeof(*alias)); if (!alias) @@ -262,6 +332,27 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, return ret; } + /* Scan event and remove leading zeroes, spaces, newlines, some + * platforms have terms specified as + * event=0x0091 (read from files ../<PMU>/events/<FILE> + * and terms specified as event=0x91 (read from JSON files). + * + * Rebuild string to make alias->str member comparable. + */ + memset(newval, 0, sizeof(newval)); + ret = 0; + list_for_each_entry(term, &alias->terms, list) { + if (ret) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + ","); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%#x", term->config, term->val.num); + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%s", term->config, term->val.str); + } + alias->name = strdup(name); if (dir) { /* @@ -285,9 +376,10 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; - alias->str = strdup(val); + alias->str = strdup(newval); - list_add_tail(&alias->list, list); + if (!perf_pmu_merge_alias(alias, list)) + list_add_tail(&alias->list, list); return 0; } @@ -303,6 +395,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; + /* Remove trailing newline from sysfs file */ + rtrim(buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, NULL, NULL, NULL); } @@ -557,12 +652,6 @@ static int is_arm_pmu_core(const char *name) if (stat(path, &st) == 0) return 1; - /* Look for cpu sysfs (specific to s390) */ - scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s", - sysfs, name); - if (stat(path, &st) == 0 && !strncmp(name, "cpum_", 5)) - return 1; - return 0; } @@ -684,7 +773,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) + if (strcmp(pname, name)) continue; } @@ -841,13 +930,14 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, static __u64 pmu_format_max_value(const unsigned long *format) { - __u64 w = 0; - int fbit; + int w; - for_each_set_bit(fbit, format, PERF_PMU_FORMAT_BITS) - w |= (1ULL << fbit); - - return w; + w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); + if (!w) + return 0; + if (w < 64) + return (1ULL << w) - 1; + return -1; } /* diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index e1dbc9821617..e86f8be89157 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -111,17 +111,6 @@ void exit_probe_symbol_maps(void) symbol__exit(); } -static struct symbol *__find_kernel_function_by_name(const char *name, - struct map **mapp) -{ - return machine__find_kernel_function_by_name(host_machine, name, mapp); -} - -static struct symbol *__find_kernel_function(u64 addr, struct map **mapp) -{ - return machine__find_kernel_function(host_machine, addr, mapp); -} - static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) { /* kmap->ref_reloc_sym should be set if host_machine is initialized */ @@ -149,7 +138,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, if (reloc_sym && strcmp(name, reloc_sym->name) == 0) *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr; else { - sym = __find_kernel_function_by_name(name, &map); + sym = machine__find_kernel_symbol_by_name(host_machine, name, &map); if (!sym) return -ENOENT; *addr = map->unmap_ip(map, sym->start) - @@ -161,8 +150,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr, static struct map *kernel_get_module_map(const char *module) { - struct map_groups *grp = &host_machine->kmaps; - struct maps *maps = &grp->maps[MAP__FUNCTION]; + struct maps *maps = machine__kernel_maps(host_machine); struct map *pos; /* A file path -- this is an offline module */ @@ -177,8 +165,7 @@ static struct map *kernel_get_module_map(const char *module) if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0 && module[pos->dso->short_name_len - 2] == '\0') { - map__get(pos); - return pos; + return map__get(pos); } } return NULL; @@ -341,7 +328,7 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) char module_name[128]; snprintf(module_name, sizeof(module_name), "[%s]", module); - map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); + map = map_groups__find_by_name(&host_machine->kmaps, module_name); if (map) { dso = map->dso; goto found; @@ -1832,6 +1819,12 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) tp->offset = strtoul(fmt2_str, NULL, 10); } + if (tev->uprobes) { + fmt2_str = strchr(p, '('); + if (fmt2_str) + tp->ref_ctr_offset = strtoul(fmt2_str + 1, NULL, 0); + } + tev->nargs = argc - 2; tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); if (tev->args == NULL) { @@ -2025,6 +2018,22 @@ static int synthesize_probe_trace_arg(struct probe_trace_arg *arg, return err; } +static int +synthesize_uprobe_trace_def(struct probe_trace_event *tev, struct strbuf *buf) +{ + struct probe_trace_point *tp = &tev->point; + int err; + + err = strbuf_addf(buf, "%s:0x%lx", tp->module, tp->address); + + if (err >= 0 && tp->ref_ctr_offset) { + if (!uprobe_ref_ctr_is_supported()) + return -1; + err = strbuf_addf(buf, "(0x%lx)", tp->ref_ctr_offset); + } + return err >= 0 ? 0 : -1; +} + char *synthesize_probe_trace_command(struct probe_trace_event *tev) { struct probe_trace_point *tp = &tev->point; @@ -2054,15 +2063,17 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) } /* Use the tp->address for uprobes */ - if (tev->uprobes) - err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address); - else if (!strncmp(tp->symbol, "0x", 2)) + if (tev->uprobes) { + err = synthesize_uprobe_trace_def(tev, &buf); + } else if (!strncmp(tp->symbol, "0x", 2)) { /* Absolute address. See try_to_find_absolute_address() */ err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "", tp->module ? ":" : "", tp->address); - else + } else { err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "", tp->module ? ":" : "", tp->symbol, tp->offset); + } + if (err) goto error; @@ -2098,7 +2109,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, } if (addr) { addr += tp->offset; - sym = __find_kernel_function(addr, &map); + sym = machine__find_kernel_symbol(host_machine, addr, &map); } } @@ -2646,6 +2657,13 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev) { int i; char *buf = synthesize_probe_trace_command(tev); + struct probe_trace_point *tp = &tev->point; + + if (tp->ref_ctr_offset && !uprobe_ref_ctr_is_supported()) { + pr_warning("A semaphore is associated with %s:%s and " + "seems your kernel doesn't support it.\n", + tev->group, tev->event); + } /* Old uprobe event doesn't support memory dereference */ if (!tev->uprobes || tev->nargs == 0 || !buf) @@ -3504,19 +3522,18 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, (target) ? : "kernel"); goto end; } - if (!dso__sorted_by_name(map->dso, map->type)) - dso__sort_by_name(map->dso, map->type); + if (!dso__sorted_by_name(map->dso)) + dso__sort_by_name(map->dso); /* Show all (filtered) symbols */ setup_pager(); - for (nd = rb_first(&map->dso->symbol_names[map->type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); if (strfilter__compare(_filter, pos->sym.name)) printf("%s\n", pos->sym.name); - } - + } end: map__put(map); exit_probe_symbol_maps(); diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 45b14f020558..15a98c3a2a2f 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -27,6 +27,7 @@ struct probe_trace_point { char *symbol; /* Base symbol */ char *module; /* Module name */ unsigned long offset; /* Offset from symbol */ + unsigned long ref_ctr_offset; /* SDT reference counter offset */ unsigned long address; /* Actual address of the trace point */ bool retprobe; /* Return probe flag */ }; diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 4ae1123c6794..aac7817d9e14 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -84,8 +84,7 @@ int open_trace_file(const char *trace_file, bool readwrite) char buf[PATH_MAX]; int ret; - ret = e_snprintf(buf, PATH_MAX, "%s/%s", - tracing_path, trace_file); + ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) @@ -697,8 +696,16 @@ out_err: #ifdef HAVE_GELF_GETNOTE_SUPPORT static unsigned long long sdt_note__get_addr(struct sdt_note *note) { - return note->bit32 ? (unsigned long long)note->addr.a32[0] - : (unsigned long long)note->addr.a64[0]; + return note->bit32 ? + (unsigned long long)note->addr.a32[SDT_NOTE_IDX_LOC] : + (unsigned long long)note->addr.a64[SDT_NOTE_IDX_LOC]; +} + +static unsigned long long sdt_note__get_ref_ctr_offset(struct sdt_note *note) +{ + return note->bit32 ? + (unsigned long long)note->addr.a32[SDT_NOTE_IDX_REFCTR] : + (unsigned long long)note->addr.a64[SDT_NOTE_IDX_REFCTR]; } static const char * const type_to_suffix[] = { @@ -776,14 +783,21 @@ static char *synthesize_sdt_probe_command(struct sdt_note *note, { struct strbuf buf; char *ret = NULL, **args; - int i, args_count; + int i, args_count, err; + unsigned long long ref_ctr_offset; if (strbuf_init(&buf, 32) < 0) return NULL; - if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx", - sdtgrp, note->name, pathname, - sdt_note__get_addr(note)) < 0) + err = strbuf_addf(&buf, "p:%s/%s %s:0x%llx", + sdtgrp, note->name, pathname, + sdt_note__get_addr(note)); + + ref_ctr_offset = sdt_note__get_ref_ctr_offset(note); + if (ref_ctr_offset && err >= 0) + err = strbuf_addf(&buf, "(0x%llx)", ref_ctr_offset); + + if (err < 0) goto error; if (!note->args) @@ -999,6 +1013,7 @@ int probe_cache__show_all_caches(struct strfilter *filter) enum ftrace_readme { FTRACE_README_PROBE_TYPE_X = 0, FTRACE_README_KRETPROBE_OFFSET, + FTRACE_README_UPROBE_REF_CTR, FTRACE_README_END, }; @@ -1010,6 +1025,7 @@ static struct { [idx] = {.pattern = pat, .avail = false} DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"), DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"), + DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"), }; static bool scan_ftrace_readme(enum ftrace_readme type) @@ -1065,3 +1081,8 @@ bool kretprobe_offset_is_supported(void) { return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET); } + +bool uprobe_ref_ctr_is_supported(void) +{ + return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR); +} diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 63f29b1d22c1..2a249182f2a6 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -69,6 +69,7 @@ struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache, int probe_cache__show_all_caches(struct strfilter *filter); bool probe_type_is_available(enum probe_type type); bool kretprobe_offset_is_supported(void); +bool uprobe_ref_ctr_is_supported(void); #else /* ! HAVE_LIBELF_SUPPORT */ static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 863b61478edd..50150dfc0cdf 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -11,6 +11,7 @@ #include "cpumap.h" #include "print_binary.h" #include "thread_map.h" +#include "mmap.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -339,36 +340,36 @@ static bool is_tracepoint(struct pyrf_event *pevent) } static PyObject* -tracepoint_field(struct pyrf_event *pe, struct format_field *field) +tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field) { - struct pevent *pevent = field->event->pevent; + struct tep_handle *pevent = field->event->pevent; void *data = pe->sample.raw_data; PyObject *ret = NULL; unsigned long long val; unsigned int offset, len; - if (field->flags & FIELD_IS_ARRAY) { + if (field->flags & TEP_FIELD_IS_ARRAY) { offset = field->offset; len = field->size; - if (field->flags & FIELD_IS_DYNAMIC) { - val = pevent_read_number(pevent, data + offset, len); + if (field->flags & TEP_FIELD_IS_DYNAMIC) { + val = tep_read_number(pevent, data + offset, len); offset = val; len = offset >> 16; offset &= 0xffff; } - if (field->flags & FIELD_IS_STRING && + if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { ret = _PyUnicode_FromString((char *)data + offset); } else { ret = PyByteArray_FromStringAndSize((const char *) data + offset, len); - field->flags &= ~FIELD_IS_STRING; + field->flags &= ~TEP_FIELD_IS_STRING; } } else { - val = pevent_read_number(pevent, data + field->offset, - field->size); - if (field->flags & FIELD_IS_POINTER) + val = tep_read_number(pevent, data + field->offset, + field->size); + if (field->flags & TEP_FIELD_IS_POINTER) ret = PyLong_FromUnsignedLong((unsigned long) val); - else if (field->flags & FIELD_IS_SIGNED) + else if (field->flags & TEP_FIELD_IS_SIGNED) ret = PyLong_FromLong((long) val); else ret = PyLong_FromUnsignedLong((unsigned long) val); @@ -382,10 +383,10 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) { const char *str = _PyUnicode_AsString(PyObject_Str(attr_name)); struct perf_evsel *evsel = pevent->evsel; - struct format_field *field; + struct tep_format_field *field; if (!evsel->tp_format) { - struct event_format *tp_format; + struct tep_event_format *tp_format; tp_format = trace_event__tp_format_id(evsel->attr.config); if (!tp_format) @@ -394,7 +395,7 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name) evsel->tp_format = tp_format; } - field = pevent_find_any_field(evsel->tp_format, str); + field = tep_find_any_field(evsel->tp_format, str); if (!field) return NULL; @@ -976,6 +977,20 @@ static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist, return Py_BuildValue("i", evlist->nr_entries); } +static struct perf_mmap *get_md(struct perf_evlist *evlist, int cpu) +{ + int i; + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *md = &evlist->mmap[i]; + + if (md->cpu == cpu) + return md; + } + + return NULL; +} + static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, PyObject *args, PyObject *kwargs) { @@ -990,7 +1005,10 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, &cpu, &sample_id_all)) return NULL; - md = &evlist->mmap[cpu]; + md = get_md(evlist, cpu); + if (!md) + return NULL; + if (perf_mmap__read_init(md) < 0) goto end; @@ -1222,7 +1240,7 @@ static struct { static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel, PyObject *args, PyObject *kwargs) { - struct event_format *tp_format; + struct tep_event_format *tp_format; static char *kwlist[] = { "sys", "name", NULL }; char *sys = NULL; char *name = NULL; diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c deleted file mode 100644 index 22eaa201aa27..000000000000 --- a/tools/perf/util/quote.c +++ /dev/null @@ -1,62 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <errno.h> -#include <stdlib.h> -#include "strbuf.h" -#include "quote.h" -#include "util.h" - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * - * E.g. - * original sq_quote result - * name ==> name ==> 'name' - * a b ==> a b ==> 'a b' - * a'b ==> a'\''b ==> 'a'\''b' - * a!b ==> a'\!'b ==> 'a'\!'b' - */ -static inline int need_bs_quote(char c) -{ - return (c == '\'' || c == '!'); -} - -static int sq_quote_buf(struct strbuf *dst, const char *src) -{ - char *to_free = NULL; - int ret; - - if (dst->buf == src) - to_free = strbuf_detach(dst, NULL); - - ret = strbuf_addch(dst, '\''); - while (!ret && *src) { - size_t len = strcspn(src, "'!"); - ret = strbuf_add(dst, src, len); - src += len; - while (!ret && need_bs_quote(*src)) - ret = strbuf_addf(dst, "'\\%c\'", *src++); - } - if (!ret) - ret = strbuf_addch(dst, '\''); - free(to_free); - - return ret; -} - -int sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) -{ - int i, ret; - - /* Copy into destination buffer. */ - ret = strbuf_grow(dst, 255); - for (i = 0; !ret && argv[i]; ++i) { - ret = strbuf_addch(dst, ' '); - if (ret) - break; - ret = sq_quote_buf(dst, argv[i]); - if (maxlen && dst->len > maxlen) - return -ENOSPC; - } - return ret; -} diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h deleted file mode 100644 index 274bf26d3511..000000000000 --- a/tools/perf/util/quote.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_QUOTE_H -#define __PERF_QUOTE_H - -#include <stddef.h> - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * single quote pair. - * - * For example, if you are passing the result to system() as an - * argument: - * - * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) - * - * would be appropriate. If the system() is going to call ssh to - * run the command on the other side: - * - * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); - * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); - * - * Note that the above examples leak memory! Remember to free result from - * sq_quote() in a real application. - */ - -struct strbuf; - -int sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); - -#endif /* __PERF_QUOTE_H */ diff --git a/tools/perf/util/s390-cpumsf-kernel.h b/tools/perf/util/s390-cpumsf-kernel.h new file mode 100644 index 000000000000..de8c7ad0eca8 --- /dev/null +++ b/tools/perf/util/s390-cpumsf-kernel.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Auxtrace support for s390 CPU measurement sampling facility + * + * Copyright IBM Corp. 2018 + * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> + * Thomas Richter <tmricht@linux.ibm.com> + */ +#ifndef S390_CPUMSF_KERNEL_H +#define S390_CPUMSF_KERNEL_H + +#define S390_CPUMSF_PAGESZ 4096 /* Size of sample block units */ +#define S390_CPUMSF_DIAG_DEF_FIRST 0x8001 /* Diagnostic entry lowest id */ + +struct hws_basic_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:4; /* 16-19 reserved */ + unsigned int U:4; /* 20-23 Number of unique instruct. */ + unsigned int z:2; /* zeros */ + unsigned int T:1; /* 26 PSW DAT mode */ + unsigned int W:1; /* 27 PSW wait state */ + unsigned int P:1; /* 28 PSW Problem state */ + unsigned int AS:2; /* 29-30 PSW address-space control */ + unsigned int I:1; /* 31 entry valid or invalid */ + unsigned int CL:2; /* 32-33 Configuration Level */ + unsigned int:14; + unsigned int prim_asn:16; /* primary ASN */ + unsigned long long ia; /* Instruction Address */ + unsigned long long gpp; /* Guest Program Parameter */ + unsigned long long hpp; /* Host Program Parameter */ +}; + +struct hws_diag_entry { + unsigned int def:16; /* 0-15 Data Entry Format */ + unsigned int R:15; /* 16-19 and 20-30 reserved */ + unsigned int I:1; /* 31 entry valid or invalid */ + u8 data[]; /* Machine-dependent sample data */ +}; + +struct hws_combined_entry { + struct hws_basic_entry basic; /* Basic-sampling data entry */ + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ +}; + +struct hws_trailer_entry { + union { + struct { + unsigned int f:1; /* 0 - Block Full Indicator */ + unsigned int a:1; /* 1 - Alert request control */ + unsigned int t:1; /* 2 - Timestamp format */ + unsigned int:29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ + }; + unsigned long long flags; /* 0 - 64: All indicators */ + }; + unsigned long long overflow; /* 64 - sample Overflow count */ + unsigned char timestamp[16]; /* 16 - 31 timestamp */ + unsigned long long reserved1; /* 32 -Reserved */ + unsigned long long reserved2; /* */ + union { /* 48 - reserved for programming use */ + struct { + unsigned long long clock_base:1; /* in progusage2 */ + unsigned long long progusage1:63; + unsigned long long progusage2; + }; + unsigned long long progusage[2]; + }; +}; + +#endif diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c new file mode 100644 index 000000000000..a2eeebbfb25f --- /dev/null +++ b/tools/perf/util/s390-cpumsf.c @@ -0,0 +1,1033 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2018 + * Auxtrace support for s390 CPU-Measurement Sampling Facility + * + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + * + * Auxiliary traces are collected during 'perf record' using rbd000 event. + * Several PERF_RECORD_XXX are generated during recording: + * + * PERF_RECORD_AUX: + * Records that new data landed in the AUX buffer part. + * PERF_RECORD_AUXTRACE: + * Defines auxtrace data. Followed by the actual data. The contents of + * the auxtrace data is dependent on the event and the CPU. + * This record is generated by perf record command. For details + * see Documentation/perf.data-file-format.txt. + * PERF_RECORD_AUXTRACE_INFO: + * Defines a table of contains for PERF_RECORD_AUXTRACE records. This + * record is generated during 'perf record' command. Each record contains up + * to 256 entries describing offset and size of the AUXTRACE data in the + * perf.data file. + * PERF_RECORD_AUXTRACE_ERROR: + * Indicates an error during AUXTRACE collection such as buffer overflow. + * PERF_RECORD_FINISHED_ROUND: + * Perf events are not necessarily in time stamp order, as they can be + * collected in parallel on different CPUs. If the events should be + * processed in time order they need to be sorted first. + * Perf report guarantees that there is no reordering over a + * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a + * time stamp lower than this record are processed (and displayed) before + * the succeeding perf record are processed. + * + * These records are evaluated during perf report command. + * + * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for + * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info() + * below. + * Auxiliary trace data is collected per CPU. To merge the data into the report + * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace + * data is in ascending order. + * + * Each queue has a double linked list of auxtrace_buffers. This list contains + * the offset and size of a CPU's auxtrace data. During auxtrace processing + * the data portion is mmap()'ed. + * + * To sort the queues in chronological order, all queue access is controlled + * by the auxtrace_heap. This is basicly a stack, each stack element has two + * entries, the queue number and a time stamp. However the stack is sorted by + * the time stamps. The highest time stamp is at the bottom the lowest + * (nearest) time stamp is at the top. That sort order is maintained at all + * times! + * + * After the auxtrace infrastructure has been setup, the auxtrace queues are + * filled with data (offset/size pairs) and the auxtrace_heap is populated. + * + * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues. + * Each record is handled by s390_cpumsf_process_event(). The time stamp of + * the perf record is compared with the time stamp located on the auxtrace_heap + * top element. If that time stamp is lower than the time stamp from the + * record sample, the auxtrace queues will be processed. As auxtrace queues + * control many auxtrace_buffers and each buffer can be quite large, the + * auxtrace buffer might be processed only partially. In this case the + * position in the auxtrace_buffer of that queue is remembered and the time + * stamp of the last processed entry of the auxtrace_buffer replaces the + * current auxtrace_heap top. + * + * 3. Auxtrace_queues might run of out data and are feeded by the + * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event(). + * + * Event Generation + * Each sampling-data entry in the auxilary trace data generates a perf sample. + * This sample is filled + * with data from the auxtrace such as PID/TID, instruction address, CPU state, + * etc. This sample is processed with perf_session__deliver_synth_event() to + * be included into the GUI. + * + * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining + * auxiliary traces entries until the time stamp of this record is reached + * auxtrace_heap top. This is triggered by ordered_event->deliver(). + * + * + * Perf event processing. + * Event processing of PERF_RECORD_XXX entries relies on time stamp entries. + * This is the function call sequence: + * + * __cmd_report() + * | + * perf_session__process_events() + * | + * __perf_session__process_events() + * | + * perf_session__process_event() + * | This functions splits the PERF_RECORD_XXX records. + * | - Those generated by perf record command (type number equal or higher + * | than PERF_RECORD_USER_TYPE_START) are handled by + * | perf_session__process_user_event(see below) + * | - Those generated by the kernel are handled by + * | perf_evlist__parse_sample_timestamp() + * | + * perf_evlist__parse_sample_timestamp() + * | Extract time stamp from sample data. + * | + * perf_session__queue_event() + * | If timestamp is positive the sample is entered into an ordered_event + * | list, sort order is the timestamp. The event processing is deferred until + * | later (see perf_session__process_user_event()). + * | Other timestamps (0 or -1) are handled immediately by + * | perf_session__deliver_event(). These are events generated at start up + * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP* + * | records. They are needed to create a list of running processes and its + * | memory mappings and layout. They are needed at the beginning to enable + * | command perf report to create process trees and memory mappings. + * | + * perf_session__deliver_event() + * | Delivers a PERF_RECORD_XXX entry for handling. + * | + * auxtrace__process_event() + * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with + * | time stamps from the auxiliary trace buffers. This enables + * | synchronization between auxiliary trace data and the events on the + * | perf.data file. + * | + * machine__deliver_event() + * | Handles the PERF_RECORD_XXX event. This depends on the record type. + * It might update the process tree, update a process memory map or enter + * a sample with IP and call back chain data into GUI data pool. + * + * + * Deferred processing determined by perf_session__process_user_event() is + * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These + * are generated during command perf record. + * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all + * PERF_RECORD_XXX entries stored in the ordered_event list. This list was + * built up while reading the perf.data file. + * Each event is now processed by calling perf_session__deliver_event(). + * This enables time synchronization between the data in the perf.data file and + * the data in the auxiliary trace buffers. + */ + +#include <endian.h> +#include <errno.h> +#include <byteswap.h> +#include <inttypes.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include <sys/stat.h> +#include <sys/types.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "debug.h" +#include "auxtrace.h" +#include "s390-cpumsf.h" +#include "s390-cpumsf-kernel.h" +#include "config.h" + +struct s390_cpumsf { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + struct perf_session *session; + struct machine *machine; + u32 auxtrace_type; + u32 pmu_type; + u16 machine_type; + bool data_queued; + bool use_logfile; + char *logdir; +}; + +struct s390_cpumsf_queue { + struct s390_cpumsf *sf; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + int cpu; + FILE *logfile; +}; + +/* Display s390 CPU measurement facility basic-sampling data entry */ +static bool s390_cpumsf_basic_show(const char *color, size_t pos, + struct hws_basic_entry *basic) +{ + if (basic->def != 1) { + pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); + return false; + } + color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x" + " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n" + "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n", + pos, basic->def, basic->U, + basic->T ? 'T' : ' ', + basic->W ? 'W' : ' ', + basic->P ? 'P' : ' ', + basic->I ? 'I' : ' ', + basic->AS, basic->prim_asn, basic->ia, basic->CL, + basic->hpp, basic->gpp); + return true; +} + +/* Display s390 CPU measurement facility diagnostic-sampling data entry */ +static bool s390_cpumsf_diag_show(const char *color, size_t pos, + struct hws_diag_entry *diag) +{ + if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { + pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); + return false; + } + color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n", + pos, diag->def, diag->I ? 'I' : ' '); + return true; +} + +/* Return TOD timestamp contained in an trailer entry */ +static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* te->t set: TOD in STCKE format, bytes 8-15 + * to->t not set: TOD in STCK format, bytes 0-7 + */ + unsigned long long ts; + + memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); + return ts; +} + +/* Display s390 CPU measurement facility trailer entry */ +static bool s390_cpumsf_trailer_show(const char *color, size_t pos, + struct hws_trailer_entry *te) +{ + if (te->bsdes != sizeof(struct hws_basic_entry)) { + pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); + return false; + } + color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" + " dsdes:%d Overflow:%lld Time:%#llx\n" + "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", + pos, + te->f ? 'F' : ' ', + te->a ? 'A' : ' ', + te->t ? 'T' : ' ', + te->bsdes, te->dsdes, te->overflow, + trailer_timestamp(te), te->clock_base, te->progusage2, + te->progusage[0], te->progusage[1]); + return true; +} + +/* Test a sample data block. It must be 4KB or a multiple thereof in size and + * 4KB page aligned. Each sample data page has a trailer entry at the + * end which contains the sample entry data sizes. + * + * Return true if the sample data block passes the checks and set the + * basic set entry size and diagnostic set entry size. + * + * Return false on failure. + * + * Note: Old hardware does not set the basic or diagnostic entry sizes + * in the trailer entry. Use the type number instead. + */ +static bool s390_cpumsf_validate(int machine_type, + unsigned char *buf, size_t len, + unsigned short *bsdes, + unsigned short *dsdes) +{ + struct hws_basic_entry *basic = (struct hws_basic_entry *)buf; + struct hws_trailer_entry *te; + + *dsdes = *bsdes = 0; + if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ + return false; + if (basic->def != 1) /* No basic set entry, must be first */ + return false; + /* Check for trailer entry at end of SDB */ + te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ + - sizeof(*te)); + *bsdes = te->bsdes; + *dsdes = te->dsdes; + if (!te->bsdes && !te->dsdes) { + /* Very old hardware, use CPUID */ + switch (machine_type) { + case 2097: + case 2098: + *dsdes = 64; + *bsdes = 32; + break; + case 2817: + case 2818: + *dsdes = 74; + *bsdes = 32; + break; + case 2827: + case 2828: + *dsdes = 85; + *bsdes = 32; + break; + default: + /* Illegal trailer entry */ + return false; + } + } + return true; +} + +/* Return true if there is room for another entry */ +static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos) +{ + size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry); + + if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz) + return false; + return true; +} + +/* Dump an auxiliary buffer. These buffers are multiple of + * 4KB SDB pages. + */ +static void s390_cpumsf_dump(struct s390_cpumsf *sf, + unsigned char *buf, size_t len) +{ + const char *color = PERF_COLOR_BLUE; + struct hws_basic_entry *basic; + struct hws_diag_entry *diag; + unsigned short bsdes, dsdes; + size_t pos = 0; + + color_fprintf(stdout, color, + ". ... s390 AUX data: size %zu bytes\n", + len); + + if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, + &dsdes)) { + pr_err("Invalid AUX trace data block size:%zu" + " (type:%d bsdes:%hd dsdes:%hd)\n", + len, sf->machine_type, bsdes, dsdes); + return; + } + + /* s390 kernel always returns 4KB blocks fully occupied, + * no partially filled SDBs. + */ + while (pos < len) { + /* Handle Basic entry */ + basic = (struct hws_basic_entry *)(buf + pos); + if (s390_cpumsf_basic_show(color, pos, basic)) + pos += bsdes; + else + return; + + /* Handle Diagnostic entry */ + diag = (struct hws_diag_entry *)(buf + pos); + if (s390_cpumsf_diag_show(color, pos, diag)) + pos += dsdes; + else + return; + + /* Check for trailer entry */ + if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { + /* Show trailer entry */ + struct hws_trailer_entry te; + + pos = (pos + S390_CPUMSF_PAGESZ) + & ~(S390_CPUMSF_PAGESZ - 1); + pos -= sizeof(te); + memcpy(&te, buf + pos, sizeof(te)); + /* Set descriptor sizes in case of old hardware + * where these values are not set. + */ + te.bsdes = bsdes; + te.dsdes = dsdes; + if (s390_cpumsf_trailer_show(color, pos, &te)) + pos += sizeof(te); + else + return; + } + } +} + +static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf, + size_t len) +{ + printf(".\n"); + s390_cpumsf_dump(sf, buf, len); +} + +#define S390_LPP_PID_MASK 0xffffffff + +static bool s390_cpumsf_make_event(size_t pos, + struct hws_basic_entry *basic, + struct s390_cpumsf_queue *sfq) +{ + struct perf_sample sample = { + .ip = basic->ia, + .pid = basic->hpp & S390_LPP_PID_MASK, + .tid = basic->hpp & S390_LPP_PID_MASK, + .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN, + .cpu = sfq->cpu, + .period = 1 + }; + union perf_event event; + + memset(&event, 0, sizeof(event)); + if (basic->CL == 1) /* Native LPAR mode */ + sample.cpumode = basic->P ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + else if (basic->CL == 2) /* Guest kernel/user space */ + sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else if (basic->gpp || basic->prim_asn != 0xffff) + /* Use heuristics on old hardware */ + sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER + : PERF_RECORD_MISC_GUEST_KERNEL; + else + sample.cpumode = basic->P ? PERF_RECORD_MISC_USER + : PERF_RECORD_MISC_KERNEL; + + event.sample.header.type = PERF_RECORD_SAMPLE; + event.sample.header.misc = sample.cpumode; + event.sample.header.size = sizeof(struct perf_event_header); + + pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n", + __func__, pos, sample.ip, basic->P, basic->CL, sample.pid, + sample.tid, sample.cpumode, sample.cpu); + if (perf_session__deliver_synth_event(sfq->sf->session, &event, + &sample)) { + pr_err("s390 Auxiliary Trace: failed to deliver event\n"); + return false; + } + return true; +} + +static unsigned long long get_trailer_time(const unsigned char *buf) +{ + struct hws_trailer_entry *te; + unsigned long long aux_time; + + te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ + - sizeof(*te)); + + if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ + return 0; + + /* Correct calculation to convert time stamp in trailer entry to + * nano seconds (taken from arch/s390 function tod_to_ns()). + * TOD_CLOCK_BASE is stored in trailer entry member progusage2. + */ + aux_time = trailer_timestamp(te) - te->progusage2; + aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); + return aux_time; +} + +/* Process the data samples of a single queue. The first parameter is a + * pointer to the queue, the second parameter is the time stamp. This + * is the time stamp: + * - of the event that triggered this processing. + * - or the time stamp when the last proccesing of this queue stopped. + * In this case it stopped at a 4KB page boundary and record the + * position on where to continue processing on the next invocation + * (see buffer->use_data and buffer->use_size). + * + * When this function returns the second parameter is updated to + * reflect the time stamp of the last processed auxiliary data entry + * (taken from the trailer entry of that page). The caller uses this + * returned time stamp to record the last processed entry in this + * queue. + * + * The function returns: + * 0: Processing successful. The second parameter returns the + * time stamp from the trailer entry until which position + * processing took place. Subsequent calls resume from this + * position. + * <0: An error occurred during processing. The second parameter + * returns the maximum time stamp. + * >0: Done on this queue. The second parameter returns the + * maximum time stamp. + */ +static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts) +{ + struct s390_cpumsf *sf = sfq->sf; + unsigned char *buf = sfq->buffer->use_data; + size_t len = sfq->buffer->use_size; + struct hws_basic_entry *basic; + unsigned short bsdes, dsdes; + size_t pos = 0; + int err = 1; + u64 aux_ts; + + if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes, + &dsdes)) { + *ts = ~0ULL; + return -1; + } + + /* Get trailer entry time stamp and check if entries in + * this auxiliary page are ready for processing. If the + * time stamp of the first entry is too high, whole buffer + * can be skipped. In this case return time stamp. + */ + aux_ts = get_trailer_time(buf); + if (!aux_ts) { + pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n", + sfq->buffer->data_offset); + aux_ts = ~0ULL; + goto out; + } + if (aux_ts > *ts) { + *ts = aux_ts; + return 0; + } + + while (pos < len) { + /* Handle Basic entry */ + basic = (struct hws_basic_entry *)(buf + pos); + if (s390_cpumsf_make_event(pos, basic, sfq)) + pos += bsdes; + else { + err = -EBADF; + goto out; + } + + pos += dsdes; /* Skip diagnositic entry */ + + /* Check for trailer entry */ + if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) { + pos = (pos + S390_CPUMSF_PAGESZ) + & ~(S390_CPUMSF_PAGESZ - 1); + /* Check existence of next page */ + if (pos >= len) + break; + aux_ts = get_trailer_time(buf + pos); + if (!aux_ts) { + aux_ts = ~0ULL; + goto out; + } + if (aux_ts > *ts) { + *ts = aux_ts; + sfq->buffer->use_data += pos; + sfq->buffer->use_size -= pos; + return 0; + } + } + } +out: + *ts = aux_ts; + sfq->buffer->use_size = 0; + sfq->buffer->use_data = NULL; + return err; /* Buffer completely scanned or error */ +} + +/* Run the s390 auxiliary trace decoder. + * Select the queue buffer to operate on, the caller already selected + * the proper queue, depending on second parameter 'ts'. + * This is the time stamp until which the auxiliary entries should + * be processed. This value is updated by called functions and + * returned to the caller. + * + * Resume processing in the current buffer. If there is no buffer + * get a new buffer from the queue and setup start position for + * processing. + * When a buffer is completely processed remove it from the queue + * before returning. + * + * This function returns + * 1: When the queue is empty. Second parameter will be set to + * maximum time stamp. + * 0: Normal processing done. + * <0: Error during queue buffer setup. This causes the caller + * to stop processing completely. + */ +static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq, + u64 *ts) +{ + + struct auxtrace_buffer *buffer; + struct auxtrace_queue *queue; + int err; + + queue = &sfq->sf->queues.queue_array[sfq->queue_nr]; + + /* Get buffer and last position in buffer to resume + * decoding the auxiliary entries. One buffer might be large + * and decoding might stop in between. This depends on the time + * stamp of the trailer entry in each page of the auxiliary + * data and the time stamp of the event triggering the decoding. + */ + if (sfq->buffer == NULL) { + sfq->buffer = buffer = auxtrace_buffer__next(queue, + sfq->buffer); + if (!buffer) { + *ts = ~0ULL; + return 1; /* Processing done on this queue */ + } + /* Start with a new buffer on this queue */ + if (buffer->data) { + buffer->use_size = buffer->size; + buffer->use_data = buffer->data; + } + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } + } else + buffer = sfq->buffer; + + if (!buffer->data) { + int fd = perf_data__fd(sfq->sf->session->data); + + buffer->data = auxtrace_buffer__get_data(buffer, fd); + if (!buffer->data) + return -ENOMEM; + buffer->use_size = buffer->size; + buffer->use_data = buffer->data; + + if (sfq->logfile) { /* Write into log file */ + size_t rc = fwrite(buffer->data, buffer->size, 1, + sfq->logfile); + if (rc != 1) + pr_err("Failed to write auxiliary data\n"); + } + } + pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n", + __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset, + buffer->size, buffer->use_size); + err = s390_cpumsf_samples(sfq, ts); + + /* If non-zero, there is either an error (err < 0) or the buffer is + * completely done (err > 0). The error is unrecoverable, usually + * some descriptors could not be read successfully, so continue with + * the next buffer. + * In both cases the parameter 'ts' has been updated. + */ + if (err) { + sfq->buffer = NULL; + list_del(&buffer->list); + auxtrace_buffer__free(buffer); + if (err > 0) /* Buffer done, no error */ + err = 0; + } + return err; +} + +static struct s390_cpumsf_queue * +s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr) +{ + struct s390_cpumsf_queue *sfq; + + sfq = zalloc(sizeof(struct s390_cpumsf_queue)); + if (sfq == NULL) + return NULL; + + sfq->sf = sf; + sfq->queue_nr = queue_nr; + sfq->cpu = -1; + if (sf->use_logfile) { + char *name; + int rc; + + rc = (sf->logdir) + ? asprintf(&name, "%s/aux.smp.%02x", + sf->logdir, queue_nr) + : asprintf(&name, "aux.smp.%02x", queue_nr); + if (rc > 0) + sfq->logfile = fopen(name, "w"); + if (sfq->logfile == NULL) { + pr_err("Failed to open auxiliary log file %s," + "continue...\n", name); + sf->use_logfile = false; + } + free(name); + } + return sfq; +} + +static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf, + struct auxtrace_queue *queue, + unsigned int queue_nr, u64 ts) +{ + struct s390_cpumsf_queue *sfq = queue->priv; + + if (list_empty(&queue->head)) + return 0; + + if (sfq == NULL) { + sfq = s390_cpumsf_alloc_queue(sf, queue_nr); + if (!sfq) + return -ENOMEM; + queue->priv = sfq; + + if (queue->cpu != -1) + sfq->cpu = queue->cpu; + } + return auxtrace_heap__add(&sf->heap, queue_nr, ts); +} + +static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts) +{ + unsigned int i; + int ret = 0; + + for (i = 0; i < sf->queues.nr_queues; i++) { + ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i], + i, ts); + if (ret) + break; + } + return ret; +} + +static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts) +{ + if (!sf->queues.new_data) + return 0; + + sf->queues.new_data = false; + return s390_cpumsf_setup_queues(sf, ts); +} + +static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) +{ + unsigned int queue_nr; + u64 ts; + int ret; + + while (1) { + struct auxtrace_queue *queue; + struct s390_cpumsf_queue *sfq; + + if (!sf->heap.heap_cnt) + return 0; + + if (sf->heap.heap_array[0].ordinal >= timestamp) + return 0; + + queue_nr = sf->heap.heap_array[0].queue_nr; + queue = &sf->queues.queue_array[queue_nr]; + sfq = queue->priv; + + auxtrace_heap__pop(&sf->heap); + if (sf->heap.heap_cnt) { + ts = sf->heap.heap_array[0].ordinal + 1; + if (ts > timestamp) + ts = timestamp; + } else { + ts = timestamp; + } + + ret = s390_cpumsf_run_decoder(sfq, &ts); + if (ret < 0) { + auxtrace_heap__add(&sf->heap, queue_nr, ts); + return ret; + } + if (!ret) { + ret = auxtrace_heap__add(&sf->heap, queue_nr, ts); + if (ret < 0) + return ret; + } + } + return 0; +} + +static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, + pid_t pid, pid_t tid, u64 ip) +{ + char msg[MAX_AUXTRACE_ERROR_MSG]; + union perf_event event; + int err; + + strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, + code, cpu, pid, tid, ip, msg); + + err = perf_session__deliver_synth_event(sf->session, &event, NULL); + if (err) + pr_err("s390 Auxiliary Trace: failed to deliver error event," + "error %d\n", err); + return err; +} + +static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) +{ + return s390_cpumsf_synth_error(sf, 1, sample->cpu, + sample->pid, sample->tid, 0); +} + +static int +s390_cpumsf_process_event(struct perf_session *session __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + struct s390_cpumsf *sf = container_of(session->auxtrace, + struct s390_cpumsf, + auxtrace); + u64 timestamp = sample->time; + int err = 0; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("s390 Auxiliary Trace requires ordered events\n"); + return -EINVAL; + } + + if (event->header.type == PERF_RECORD_AUX && + event->aux.flags & PERF_AUX_FLAG_TRUNCATED) + return s390_cpumsf_lost(sf, sample); + + if (timestamp) { + err = s390_cpumsf_update_queues(sf, timestamp); + if (!err) + err = s390_cpumsf_process_queues(sf, timestamp); + } + return err; +} + +struct s390_cpumsf_synth { + struct perf_tool cpumsf_tool; + struct perf_session *session; +}; + +static int +s390_cpumsf_process_auxtrace_event(struct perf_session *session, + union perf_event *event __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + struct s390_cpumsf *sf = container_of(session->auxtrace, + struct s390_cpumsf, + auxtrace); + + int fd = perf_data__fd(session->data); + struct auxtrace_buffer *buffer; + off_t data_offset; + int err; + + if (sf->data_queued) + return 0; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&sf->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here after copying piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + s390_cpumsf_dump_event(sf, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + return 0; +} + +static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused) +{ +} + +static int s390_cpumsf_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void s390_cpumsf_free_queues(struct perf_session *session) +{ + struct s390_cpumsf *sf = container_of(session->auxtrace, + struct s390_cpumsf, + auxtrace); + struct auxtrace_queues *queues = &sf->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) + queues->queue_array[i].priv; + + if (sfq != NULL && sfq->logfile) { + fclose(sfq->logfile); + sfq->logfile = NULL; + } + zfree(&queues->queue_array[i].priv); + } + auxtrace_queues__free(queues); +} + +static void s390_cpumsf_free(struct perf_session *session) +{ + struct s390_cpumsf *sf = container_of(session->auxtrace, + struct s390_cpumsf, + auxtrace); + + auxtrace_heap__free(&sf->heap); + s390_cpumsf_free_queues(session); + session->auxtrace = NULL; + free(sf->logdir); + free(sf); +} + +static int s390_cpumsf_get_type(const char *cpuid) +{ + int ret, family = 0; + + ret = sscanf(cpuid, "%*[^,],%u", &family); + return (ret == 1) ? family : 0; +} + +/* Check itrace options set on perf report command. + * Return true, if none are set or all options specified can be + * handled on s390 (currently only option 'd' for logging. + * Return false otherwise. + */ +static bool check_auxtrace_itrace(struct itrace_synth_opts *itops) +{ + bool ison = false; + + if (!itops || !itops->set) + return true; + ison = itops->inject || itops->instructions || itops->branches || + itops->transactions || itops->ptwrites || + itops->pwr_events || itops->errors || + itops->dont_decode || itops->calls || itops->returns || + itops->callchain || itops->thread_stack || + itops->last_branch; + if (!ison) + return true; + pr_err("Unsupported --itrace options specified\n"); + return false; +} + +/* Check for AUXTRACE dump directory if it is needed. + * On failure print an error message but continue. + * Return 0 on wrong keyword in config file and 1 otherwise. + */ +static int s390_cpumsf__config(const char *var, const char *value, void *cb) +{ + struct s390_cpumsf *sf = cb; + struct stat stbuf; + int rc; + + if (strcmp(var, "auxtrace.dumpdir")) + return 0; + sf->logdir = strdup(value); + if (sf->logdir == NULL) { + pr_err("Failed to find auxtrace log directory %s," + " continue with current directory...\n", value); + return 1; + } + rc = stat(sf->logdir, &stbuf); + if (rc == -1 || !S_ISDIR(stbuf.st_mode)) { + pr_err("Missing auxtrace log directory %s," + " continue with current directory...\n", value); + free(sf->logdir); + sf->logdir = NULL; + } + return 1; +} + +int s390_cpumsf_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + struct s390_cpumsf *sf; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event)) + return -EINVAL; + + sf = zalloc(sizeof(struct s390_cpumsf)); + if (sf == NULL) + return -ENOMEM; + + if (!check_auxtrace_itrace(session->itrace_synth_opts)) { + err = -EINVAL; + goto err_free; + } + sf->use_logfile = session->itrace_synth_opts->log; + if (sf->use_logfile) + perf_config(s390_cpumsf__config, sf); + + err = auxtrace_queues__init(&sf->queues); + if (err) + goto err_free; + + sf->session = session; + sf->machine = &session->machines.host; /* No kvm support */ + sf->auxtrace_type = auxtrace_info->type; + sf->pmu_type = PERF_TYPE_RAW; + sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid); + + sf->auxtrace.process_event = s390_cpumsf_process_event; + sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event; + sf->auxtrace.flush_events = s390_cpumsf_flush; + sf->auxtrace.free_events = s390_cpumsf_free_events; + sf->auxtrace.free = s390_cpumsf_free; + session->auxtrace = &sf->auxtrace; + + if (dump_trace) + return 0; + + err = auxtrace_queues__process_index(&sf->queues, session); + if (err) + goto err_free_queues; + + if (sf->queues.populated) + sf->data_queued = true; + + return 0; + +err_free_queues: + auxtrace_queues__free(&sf->queues); + session->auxtrace = NULL; +err_free: + free(sf->logdir); + free(sf); + return err; +} diff --git a/tools/perf/util/s390-cpumsf.h b/tools/perf/util/s390-cpumsf.h new file mode 100644 index 000000000000..fb64d100555c --- /dev/null +++ b/tools/perf/util/s390-cpumsf.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2018 + * Auxtrace support for s390 CPU-Measurement Sampling Facility + * + * Author(s): Thomas Richter <tmricht@linux.ibm.com> + */ + +#ifndef INCLUDE__PERF_S390_CPUMSF_H +#define INCLUDE__PERF_S390_CPUMSF_H + +union perf_event; +struct perf_session; +struct perf_pmu; + +struct auxtrace_record * +s390_cpumsf_recording_init(int *err, struct perf_pmu *s390_cpumsf_pmu); + +int s390_cpumsf_process_auxtrace_info(union perf_event *event, + struct perf_session *session); +#endif diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 7b79c413486b..89cb887648f9 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -99,7 +99,7 @@ static void define_symbolic_value(const char *ev_name, LEAVE; } -static void define_symbolic_values(struct print_flag_sym *field, +static void define_symbolic_values(struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -157,7 +157,7 @@ static void define_flag_value(const char *ev_name, LEAVE; } -static void define_flag_values(struct print_flag_sym *field, +static void define_flag_values(struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -189,62 +189,62 @@ static void define_flag_field(const char *ev_name, LEAVE; } -static void define_event_symbols(struct event_format *event, +static void define_event_symbols(struct tep_event_format *event, const char *ev_name, - struct print_arg *args) + struct tep_print_arg *args) { if (args == NULL) return; switch (args->type) { - case PRINT_NULL: + case TEP_PRINT_NULL: break; - case PRINT_ATOM: + case TEP_PRINT_ATOM: define_flag_value(ev_name, cur_field_name, "0", args->atom.atom); zero_flag_atom = 0; break; - case PRINT_FIELD: + case TEP_PRINT_FIELD: free(cur_field_name); cur_field_name = strdup(args->field.name); break; - case PRINT_FLAGS: + case TEP_PRINT_FLAGS: define_event_symbols(event, ev_name, args->flags.field); define_flag_field(ev_name, cur_field_name, args->flags.delim); define_flag_values(args->flags.flags, ev_name, cur_field_name); break; - case PRINT_SYMBOL: + case TEP_PRINT_SYMBOL: define_event_symbols(event, ev_name, args->symbol.field); define_symbolic_field(ev_name, cur_field_name); define_symbolic_values(args->symbol.symbols, ev_name, cur_field_name); break; - case PRINT_HEX: - case PRINT_HEX_STR: + case TEP_PRINT_HEX: + case TEP_PRINT_HEX_STR: define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; - case PRINT_INT_ARRAY: + case TEP_PRINT_INT_ARRAY: define_event_symbols(event, ev_name, args->int_array.field); define_event_symbols(event, ev_name, args->int_array.count); define_event_symbols(event, ev_name, args->int_array.el_size); break; - case PRINT_BSTRING: - case PRINT_DYNAMIC_ARRAY: - case PRINT_DYNAMIC_ARRAY_LEN: - case PRINT_STRING: - case PRINT_BITMASK: + case TEP_PRINT_BSTRING: + case TEP_PRINT_DYNAMIC_ARRAY: + case TEP_PRINT_DYNAMIC_ARRAY_LEN: + case TEP_PRINT_STRING: + case TEP_PRINT_BITMASK: break; - case PRINT_TYPE: + case TEP_PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); break; - case PRINT_OP: + case TEP_PRINT_OP: if (strcmp(args->op.op, ":") == 0) zero_flag_atom = 1; define_event_symbols(event, ev_name, args->op.left); define_event_symbols(event, ev_name, args->op.right); break; - case PRINT_FUNC: + case TEP_PRINT_FUNC: default: pr_err("Unsupported print arg type\n"); /* we should warn... */ @@ -338,8 +338,8 @@ static void perl_process_tracepoint(struct perf_sample *sample, struct addr_location *al) { struct thread *thread = al->thread; - struct event_format *event = evsel->tp_format; - struct format_field *field; + struct tep_event_format *event = evsel->tp_format; + struct tep_format_field *field; static char handler[256]; unsigned long long val; unsigned long s, ns; @@ -388,9 +388,9 @@ static void perl_process_tracepoint(struct perf_sample *sample, /* common fields other than pid can be accessed via xsub fns */ for (field = event->format.fields; field; field = field->next) { - if (field->flags & FIELD_IS_STRING) { + if (field->flags & TEP_FIELD_IS_STRING) { int offset; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { offset = *(int *)(data + field->offset); offset &= 0xffff; } else @@ -399,7 +399,7 @@ static void perl_process_tracepoint(struct perf_sample *sample, } else { /* FIELD_IS_NUMERIC */ val = read_size(event, data + field->offset, field->size); - if (field->flags & FIELD_IS_SIGNED) { + if (field->flags & TEP_FIELD_IS_SIGNED) { XPUSHs(sv_2mortal(newSViv(val))); } else { XPUSHs(sv_2mortal(newSVuv(val))); @@ -535,10 +535,10 @@ static int perl_stop_script(void) return 0; } -static int perl_generate_script(struct pevent *pevent, const char *outfile) +static int perl_generate_script(struct tep_handle *pevent, const char *outfile) { - struct event_format *event = NULL; - struct format_field *f; + struct tep_event_format *event = NULL; + struct tep_format_field *f; char fname[PATH_MAX]; int not_first, count; FILE *ofp; @@ -646,11 +646,11 @@ sub print_backtrace\n\ count++; fprintf(ofp, "%s=", f->name); - if (f->flags & FIELD_IS_STRING || - f->flags & FIELD_IS_FLAG || - f->flags & FIELD_IS_SYMBOLIC) + if (f->flags & TEP_FIELD_IS_STRING || + f->flags & TEP_FIELD_IS_FLAG || + f->flags & TEP_FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); - else if (f->flags & FIELD_IS_SIGNED) + else if (f->flags & TEP_FIELD_IS_SIGNED) fprintf(ofp, "%%d"); else fprintf(ofp, "%%u"); @@ -668,7 +668,7 @@ sub print_backtrace\n\ if (++count % 5 == 0) fprintf(ofp, "\n\t "); - if (f->flags & FIELD_IS_FLAG) { + if (f->flags & TEP_FIELD_IS_FLAG) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t "); count = 4; @@ -678,7 +678,7 @@ sub print_backtrace\n\ event->name); fprintf(ofp, "\"%s\", $%s)", f->name, f->name); - } else if (f->flags & FIELD_IS_SYMBOLIC) { + } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t "); count = 4; diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7f8afacd08ee..69aa93d4ee99 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -48,6 +48,7 @@ #include "cpumap.h" #include "print_binary.h" #include "stat.h" +#include "mem-events.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -192,7 +193,7 @@ static void try_call_object(const char *handler_name, PyObject *args) call_object(handler, args, handler_name); } -static void define_value(enum print_arg_type field_type, +static void define_value(enum tep_print_arg_type field_type, const char *ev_name, const char *field_name, const char *field_value, @@ -203,7 +204,7 @@ static void define_value(enum print_arg_type field_type, unsigned long long value; unsigned n = 0; - if (field_type == PRINT_SYMBOL) + if (field_type == TEP_PRINT_SYMBOL) handler_name = "define_symbolic_value"; t = PyTuple_New(4); @@ -222,8 +223,8 @@ static void define_value(enum print_arg_type field_type, Py_DECREF(t); } -static void define_values(enum print_arg_type field_type, - struct print_flag_sym *field, +static void define_values(enum tep_print_arg_type field_type, + struct tep_print_flag_sym *field, const char *ev_name, const char *field_name) { @@ -234,7 +235,7 @@ static void define_values(enum print_arg_type field_type, define_values(field_type, field->next, ev_name, field_name); } -static void define_field(enum print_arg_type field_type, +static void define_field(enum tep_print_arg_type field_type, const char *ev_name, const char *field_name, const char *delim) @@ -243,10 +244,10 @@ static void define_field(enum print_arg_type field_type, PyObject *t; unsigned n = 0; - if (field_type == PRINT_SYMBOL) + if (field_type == TEP_PRINT_SYMBOL) handler_name = "define_symbolic_field"; - if (field_type == PRINT_FLAGS) + if (field_type == TEP_PRINT_FLAGS) t = PyTuple_New(3); else t = PyTuple_New(2); @@ -255,7 +256,7 @@ static void define_field(enum print_arg_type field_type, PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name)); PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name)); - if (field_type == PRINT_FLAGS) + if (field_type == TEP_PRINT_FLAGS) PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim)); try_call_object(handler_name, t); @@ -263,54 +264,54 @@ static void define_field(enum print_arg_type field_type, Py_DECREF(t); } -static void define_event_symbols(struct event_format *event, +static void define_event_symbols(struct tep_event_format *event, const char *ev_name, - struct print_arg *args) + struct tep_print_arg *args) { if (args == NULL) return; switch (args->type) { - case PRINT_NULL: + case TEP_PRINT_NULL: break; - case PRINT_ATOM: - define_value(PRINT_FLAGS, ev_name, cur_field_name, "0", + case TEP_PRINT_ATOM: + define_value(TEP_PRINT_FLAGS, ev_name, cur_field_name, "0", args->atom.atom); zero_flag_atom = 0; break; - case PRINT_FIELD: + case TEP_PRINT_FIELD: free(cur_field_name); cur_field_name = strdup(args->field.name); break; - case PRINT_FLAGS: + case TEP_PRINT_FLAGS: define_event_symbols(event, ev_name, args->flags.field); - define_field(PRINT_FLAGS, ev_name, cur_field_name, + define_field(TEP_PRINT_FLAGS, ev_name, cur_field_name, args->flags.delim); - define_values(PRINT_FLAGS, args->flags.flags, ev_name, + define_values(TEP_PRINT_FLAGS, args->flags.flags, ev_name, cur_field_name); break; - case PRINT_SYMBOL: + case TEP_PRINT_SYMBOL: define_event_symbols(event, ev_name, args->symbol.field); - define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL); - define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name, + define_field(TEP_PRINT_SYMBOL, ev_name, cur_field_name, NULL); + define_values(TEP_PRINT_SYMBOL, args->symbol.symbols, ev_name, cur_field_name); break; - case PRINT_HEX: - case PRINT_HEX_STR: + case TEP_PRINT_HEX: + case TEP_PRINT_HEX_STR: define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; - case PRINT_INT_ARRAY: + case TEP_PRINT_INT_ARRAY: define_event_symbols(event, ev_name, args->int_array.field); define_event_symbols(event, ev_name, args->int_array.count); define_event_symbols(event, ev_name, args->int_array.el_size); break; - case PRINT_STRING: + case TEP_PRINT_STRING: break; - case PRINT_TYPE: + case TEP_PRINT_TYPE: define_event_symbols(event, ev_name, args->typecast.item); break; - case PRINT_OP: + case TEP_PRINT_OP: if (strcmp(args->op.op, ":") == 0) zero_flag_atom = 1; define_event_symbols(event, ev_name, args->op.left); @@ -318,11 +319,11 @@ static void define_event_symbols(struct event_format *event, break; default: /* gcc warns for these? */ - case PRINT_BSTRING: - case PRINT_DYNAMIC_ARRAY: - case PRINT_DYNAMIC_ARRAY_LEN: - case PRINT_FUNC: - case PRINT_BITMASK: + case TEP_PRINT_BSTRING: + case TEP_PRINT_DYNAMIC_ARRAY: + case TEP_PRINT_DYNAMIC_ARRAY_LEN: + case TEP_PRINT_FUNC: + case TEP_PRINT_BITMASK: /* we should warn... */ return; } @@ -331,10 +332,10 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->next); } -static PyObject *get_field_numeric_entry(struct event_format *event, - struct format_field *field, void *data) +static PyObject *get_field_numeric_entry(struct tep_event_format *event, + struct tep_format_field *field, void *data) { - bool is_array = field->flags & FIELD_IS_ARRAY; + bool is_array = field->flags & TEP_FIELD_IS_ARRAY; PyObject *obj = NULL, *list = NULL; unsigned long long val; unsigned int item_size, n_items, i; @@ -352,7 +353,7 @@ static PyObject *get_field_numeric_entry(struct event_format *event, val = read_size(event, data + field->offset + i * item_size, item_size); - if (field->flags & FIELD_IS_SIGNED) { + if (field->flags & TEP_FIELD_IS_SIGNED) { if ((long long)val >= LONG_MIN && (long long)val <= LONG_MAX) obj = _PyLong_FromLong(val); @@ -372,6 +373,19 @@ static PyObject *get_field_numeric_entry(struct event_format *event, return obj; } +static const char *get_dsoname(struct map *map) +{ + const char *dsoname = "[unknown]"; + + if (map && map->dso) { + if (symbol_conf.show_kernel_path && map->dso->long_name) + dsoname = map->dso->long_name; + else + dsoname = map->dso->name; + } + + return dsoname; +} static PyObject *python_process_callchain(struct perf_sample *sample, struct perf_evsel *evsel, @@ -427,14 +441,8 @@ static PyObject *python_process_callchain(struct perf_sample *sample, } if (node->map) { - struct map *map = node->map; - const char *dsoname = "[unknown]"; - if (map && map->dso) { - if (symbol_conf.show_kernel_path && map->dso->long_name) - dsoname = map->dso->long_name; - else - dsoname = map->dso->name; - } + const char *dsoname = get_dsoname(node->map); + pydict_set_item_string_decref(pyelem, "dso", _PyUnicode_FromString(dsoname)); } @@ -448,6 +456,166 @@ exit: return pylist; } +static PyObject *python_process_brstack(struct perf_sample *sample, + struct thread *thread) +{ + struct branch_stack *br = sample->branch_stack; + PyObject *pylist; + u64 i; + + pylist = PyList_New(0); + if (!pylist) + Py_FatalError("couldn't create Python list"); + + if (!(br && br->nr)) + goto exit; + + for (i = 0; i < br->nr; i++) { + PyObject *pyelem; + struct addr_location al; + const char *dsoname; + + pyelem = PyDict_New(); + if (!pyelem) + Py_FatalError("couldn't create Python dictionary"); + + pydict_set_item_string_decref(pyelem, "from", + PyLong_FromUnsignedLongLong(br->entries[i].from)); + pydict_set_item_string_decref(pyelem, "to", + PyLong_FromUnsignedLongLong(br->entries[i].to)); + pydict_set_item_string_decref(pyelem, "mispred", + PyBool_FromLong(br->entries[i].flags.mispred)); + pydict_set_item_string_decref(pyelem, "predicted", + PyBool_FromLong(br->entries[i].flags.predicted)); + pydict_set_item_string_decref(pyelem, "in_tx", + PyBool_FromLong(br->entries[i].flags.in_tx)); + pydict_set_item_string_decref(pyelem, "abort", + PyBool_FromLong(br->entries[i].flags.abort)); + pydict_set_item_string_decref(pyelem, "cycles", + PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles)); + + thread__find_map(thread, sample->cpumode, + br->entries[i].from, &al); + dsoname = get_dsoname(al.map); + pydict_set_item_string_decref(pyelem, "from_dsoname", + _PyUnicode_FromString(dsoname)); + + thread__find_map(thread, sample->cpumode, + br->entries[i].to, &al); + dsoname = get_dsoname(al.map); + pydict_set_item_string_decref(pyelem, "to_dsoname", + _PyUnicode_FromString(dsoname)); + + PyList_Append(pylist, pyelem); + Py_DECREF(pyelem); + } + +exit: + return pylist; +} + +static unsigned long get_offset(struct symbol *sym, struct addr_location *al) +{ + unsigned long offset; + + if (al->addr < sym->end) + offset = al->addr - sym->start; + else + offset = al->addr - al->map->start - sym->start; + + return offset; +} + +static int get_symoff(struct symbol *sym, struct addr_location *al, + bool print_off, char *bf, int size) +{ + unsigned long offset; + + if (!sym || !sym->name[0]) + return scnprintf(bf, size, "%s", "[unknown]"); + + if (!print_off) + return scnprintf(bf, size, "%s", sym->name); + + offset = get_offset(sym, al); + + return scnprintf(bf, size, "%s+0x%x", sym->name, offset); +} + +static int get_br_mspred(struct branch_flags *flags, char *bf, int size) +{ + if (!flags->mispred && !flags->predicted) + return scnprintf(bf, size, "%s", "-"); + + if (flags->mispred) + return scnprintf(bf, size, "%s", "M"); + + return scnprintf(bf, size, "%s", "P"); +} + +static PyObject *python_process_brstacksym(struct perf_sample *sample, + struct thread *thread) +{ + struct branch_stack *br = sample->branch_stack; + PyObject *pylist; + u64 i; + char bf[512]; + struct addr_location al; + + pylist = PyList_New(0); + if (!pylist) + Py_FatalError("couldn't create Python list"); + + if (!(br && br->nr)) + goto exit; + + for (i = 0; i < br->nr; i++) { + PyObject *pyelem; + + pyelem = PyDict_New(); + if (!pyelem) + Py_FatalError("couldn't create Python dictionary"); + + thread__find_symbol(thread, sample->cpumode, + br->entries[i].from, &al); + get_symoff(al.sym, &al, true, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "from", + _PyUnicode_FromString(bf)); + + thread__find_symbol(thread, sample->cpumode, + br->entries[i].to, &al); + get_symoff(al.sym, &al, true, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "to", + _PyUnicode_FromString(bf)); + + get_br_mspred(&br->entries[i].flags, bf, sizeof(bf)); + pydict_set_item_string_decref(pyelem, "pred", + _PyUnicode_FromString(bf)); + + if (br->entries[i].flags.in_tx) { + pydict_set_item_string_decref(pyelem, "in_tx", + _PyUnicode_FromString("X")); + } else { + pydict_set_item_string_decref(pyelem, "in_tx", + _PyUnicode_FromString("-")); + } + + if (br->entries[i].flags.abort) { + pydict_set_item_string_decref(pyelem, "abort", + _PyUnicode_FromString("A")); + } else { + pydict_set_item_string_decref(pyelem, "abort", + _PyUnicode_FromString("-")); + } + + PyList_Append(pylist, pyelem); + Py_DECREF(pyelem); + } + +exit: + return pylist; +} + static PyObject *get_sample_value_as_tuple(struct sample_read_value *value) { PyObject *t; @@ -498,12 +666,63 @@ static void set_sample_read_in_dict(PyObject *dict_sample, pydict_set_item_string_decref(dict_sample, "values", values); } +static void set_sample_datasrc_in_dict(PyObject *dict, + struct perf_sample *sample) +{ + struct mem_info mi = { .data_src.val = sample->data_src }; + char decode[100]; + + pydict_set_item_string_decref(dict, "datasrc", + PyLong_FromUnsignedLongLong(sample->data_src)); + + perf_script__meminfo_scnprintf(decode, 100, &mi); + + pydict_set_item_string_decref(dict, "datasrc_decode", + _PyUnicode_FromString(decode)); +} + +static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size) +{ + unsigned int i = 0, r; + int printed = 0; + + bf[0] = 0; + + for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { + u64 val = regs->regs[i++]; + + printed += scnprintf(bf + printed, size - printed, + "%5s:0x%" PRIx64 " ", + perf_reg_name(r), val); + } + + return printed; +} + +static void set_regs_in_dict(PyObject *dict, + struct perf_sample *sample, + struct perf_evsel *evsel) +{ + struct perf_event_attr *attr = &evsel->attr; + char bf[512]; + + regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf)); + + pydict_set_item_string_decref(dict, "iregs", + _PyUnicode_FromString(bf)); + + regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf)); + + pydict_set_item_string_decref(dict, "uregs", + _PyUnicode_FromString(bf)); +} + static PyObject *get_perf_sample_dict(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, PyObject *callchain) { - PyObject *dict, *dict_sample; + PyObject *dict, *dict_sample, *brstack, *brstacksym; dict = PyDict_New(); if (!dict) @@ -534,6 +753,11 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict_sample, "addr", PyLong_FromUnsignedLongLong(sample->addr)); set_sample_read_in_dict(dict_sample, sample, evsel); + pydict_set_item_string_decref(dict_sample, "weight", + PyLong_FromUnsignedLongLong(sample->weight)); + pydict_set_item_string_decref(dict_sample, "transaction", + PyLong_FromUnsignedLongLong(sample->transaction)); + set_sample_datasrc_in_dict(dict_sample, sample); pydict_set_item_string_decref(dict, "sample", dict_sample); pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize( @@ -551,6 +775,14 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "callchain", callchain); + brstack = python_process_brstack(sample, al->thread); + pydict_set_item_string_decref(dict, "brstack", brstack); + + brstacksym = python_process_brstacksym(sample, al->thread); + pydict_set_item_string_decref(dict, "brstacksym", brstacksym); + + set_regs_in_dict(dict, sample, evsel); + return dict; } @@ -558,11 +790,11 @@ static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al) { - struct event_format *event = evsel->tp_format; + struct tep_event_format *event = evsel->tp_format; PyObject *handler, *context, *t, *obj = NULL, *callchain; PyObject *dict = NULL, *all_entries_dict = NULL; static char handler_name[256]; - struct format_field *field; + struct tep_format_field *field; unsigned long s, ns; unsigned n = 0; int pid; @@ -635,22 +867,22 @@ static void python_process_tracepoint(struct perf_sample *sample, unsigned int offset, len; unsigned long long val; - if (field->flags & FIELD_IS_ARRAY) { + if (field->flags & TEP_FIELD_IS_ARRAY) { offset = field->offset; len = field->size; - if (field->flags & FIELD_IS_DYNAMIC) { - val = pevent_read_number(scripting_context->pevent, - data + offset, len); + if (field->flags & TEP_FIELD_IS_DYNAMIC) { + val = tep_read_number(scripting_context->pevent, + data + offset, len); offset = val; len = offset >> 16; offset &= 0xffff; } - if (field->flags & FIELD_IS_STRING && + if (field->flags & TEP_FIELD_IS_STRING && is_printable_array(data + offset, len)) { obj = _PyUnicode_FromString((char *) data + offset); } else { obj = PyByteArray_FromStringAndSize((const char *) data + offset, len); - field->flags &= ~FIELD_IS_STRING; + field->flags &= ~TEP_FIELD_IS_STRING; } } else { /* FIELD_IS_NUMERIC */ obj = get_field_numeric_entry(event, field, data); @@ -676,14 +908,11 @@ static void python_process_tracepoint(struct perf_sample *sample, if (_PyTuple_Resize(&t, n) == -1) Py_FatalError("error resizing Python tuple"); - if (!dict) { + if (!dict) call_object(handler, t, handler_name); - } else { + else call_object(handler, t, default_handler_name); - Py_DECREF(dict); - } - Py_XDECREF(all_entries_dict); Py_DECREF(t); } @@ -1003,7 +1232,6 @@ static void python_process_general_event(struct perf_sample *sample, call_object(handler, t, handler_name); - Py_DECREF(dict); Py_DECREF(t); } @@ -1360,10 +1588,10 @@ static int python_stop_script(void) return 0; } -static int python_generate_script(struct pevent *pevent, const char *outfile) +static int python_generate_script(struct tep_handle *pevent, const char *outfile) { - struct event_format *event = NULL; - struct format_field *f; + struct tep_event_format *event = NULL; + struct tep_format_field *f; char fname[PATH_MAX]; int not_first, count; FILE *ofp; @@ -1395,6 +1623,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "# See the perf-script-python Documentation for the list " "of available functions.\n\n"); + fprintf(ofp, "from __future__ import print_function\n\n"); fprintf(ofp, "import os\n"); fprintf(ofp, "import sys\n\n"); @@ -1404,10 +1633,10 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "from Core import *\n\n\n"); fprintf(ofp, "def trace_begin():\n"); - fprintf(ofp, "\tprint \"in trace_begin\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_begin\")\n\n"); fprintf(ofp, "def trace_end():\n"); - fprintf(ofp, "\tprint \"in trace_end\"\n\n"); + fprintf(ofp, "\tprint(\"in trace_end\")\n\n"); while ((event = trace_find_next_event(pevent, event))) { fprintf(ofp, "def %s__%s(", event->system, event->name); @@ -1443,7 +1672,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) "common_secs, common_nsecs,\n\t\t\t" "common_pid, common_comm)\n\n"); - fprintf(ofp, "\t\tprint \""); + fprintf(ofp, "\t\tprint(\""); not_first = 0; count = 0; @@ -1457,12 +1686,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) count++; fprintf(ofp, "%s=", f->name); - if (f->flags & FIELD_IS_STRING || - f->flags & FIELD_IS_FLAG || - f->flags & FIELD_IS_ARRAY || - f->flags & FIELD_IS_SYMBOLIC) + if (f->flags & TEP_FIELD_IS_STRING || + f->flags & TEP_FIELD_IS_FLAG || + f->flags & TEP_FIELD_IS_ARRAY || + f->flags & TEP_FIELD_IS_SYMBOLIC) fprintf(ofp, "%%s"); - else if (f->flags & FIELD_IS_SIGNED) + else if (f->flags & TEP_FIELD_IS_SIGNED) fprintf(ofp, "%%d"); else fprintf(ofp, "%%u"); @@ -1480,7 +1709,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) if (++count % 5 == 0) fprintf(ofp, "\n\t\t"); - if (f->flags & FIELD_IS_FLAG) { + if (f->flags & TEP_FIELD_IS_FLAG) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t\t"); count = 4; @@ -1490,7 +1719,7 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) event->name); fprintf(ofp, "\"%s\", %s)", f->name, f->name); - } else if (f->flags & FIELD_IS_SYMBOLIC) { + } else if (f->flags & TEP_FIELD_IS_SYMBOLIC) { if ((count - 1) % 5 != 0) { fprintf(ofp, "\n\t\t"); count = 4; @@ -1504,31 +1733,31 @@ static int python_generate_script(struct pevent *pevent, const char *outfile) fprintf(ofp, "%s", f->name); } - fprintf(ofp, ")\n\n"); + fprintf(ofp, "))\n\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "\t\tfor node in common_callchain:"); fprintf(ofp, "\n\t\t\tif 'sym' in node:"); - fprintf(ofp, "\n\t\t\t\tprint \"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name'])"); + fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))"); fprintf(ofp, "\n\t\t\telse:"); - fprintf(ofp, "\n\t\t\t\tprint \"\t[%%x]\" %% (node['ip'])\n\n"); - fprintf(ofp, "\t\tprint \"\\n\"\n\n"); + fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n"); + fprintf(ofp, "\t\tprint()\n\n"); } fprintf(ofp, "def trace_unhandled(event_name, context, " "event_fields_dict, perf_sample_dict):\n"); - fprintf(ofp, "\t\tprint get_dict_as_string(event_fields_dict)\n"); - fprintf(ofp, "\t\tprint 'Sample: {'+" - "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}'\n\n"); + fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n"); + fprintf(ofp, "\t\tprint('Sample: {'+" + "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n"); fprintf(ofp, "def print_header(" "event_name, cpu, secs, nsecs, pid, comm):\n" - "\tprint \"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" - "(event_name, cpu, secs, nsecs, pid, comm),\n\n"); + "\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t" + "(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n"); fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n" "\treturn delimiter.join" diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f4a7a437ee87..7d2c8ce6cfad 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -199,12 +199,10 @@ void perf_session__delete(struct perf_session *session) free(session); } -static int process_event_synth_tracing_data_stub(struct perf_tool *tool +static int process_event_synth_tracing_data_stub(struct perf_session *session __maybe_unused, union perf_event *event - __maybe_unused, - struct perf_session *session - __maybe_unused) + __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -277,10 +275,8 @@ static int skipn(int fd, off_t n) return 0; } -static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session - __maybe_unused) +static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused, + union perf_event *event) { dump_printf(": unhandled!\n"); if (perf_data__is_pipe(session->data)) @@ -288,9 +284,8 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, return event->auxtrace.size; } -static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +static int process_event_op2_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -298,9 +293,8 @@ static int process_event_op2_stub(struct perf_tool *tool __maybe_unused, static -int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_thread_map_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_thread_map(event, stdout); @@ -310,9 +304,8 @@ int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused, } static -int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_cpu_map(event, stdout); @@ -322,9 +315,8 @@ int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused, } static -int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *session __maybe_unused) +int process_event_stat_config_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { if (dump_trace) perf_event__fprintf_stat_config(event, stdout); @@ -333,10 +325,8 @@ int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_stat_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) +static int process_stat_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) { if (dump_trace) perf_event__fprintf_stat(event, stdout); @@ -345,10 +335,8 @@ static int process_stat_stub(struct perf_tool *tool __maybe_unused, return 0; } -static int process_stat_round_stub(struct perf_tool *tool __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) +static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused, + union perf_event *event) { if (dump_trace) perf_event__fprintf_stat_round(event, stdout); @@ -1094,7 +1082,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; - if (sample_type & PERF_SAMPLE_CALLCHAIN) + if (evsel__has_callchain(evsel)) callchain__printf(evsel, sample); if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel)) @@ -1374,37 +1362,37 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_TRACING_DATA: /* setup for reading amidst mmap */ lseek(fd, file_offset, SEEK_SET); - return tool->tracing_data(tool, event, session); + return tool->tracing_data(session, event); case PERF_RECORD_HEADER_BUILD_ID: - return tool->build_id(tool, event, session); + return tool->build_id(session, event); case PERF_RECORD_FINISHED_ROUND: return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: - return tool->id_index(tool, event, session); + return tool->id_index(session, event); case PERF_RECORD_AUXTRACE_INFO: - return tool->auxtrace_info(tool, event, session); + return tool->auxtrace_info(session, event); case PERF_RECORD_AUXTRACE: /* setup for reading amidst mmap */ lseek(fd, file_offset + event->header.size, SEEK_SET); - return tool->auxtrace(tool, event, session); + return tool->auxtrace(session, event); case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - return tool->auxtrace_error(tool, event, session); + return tool->auxtrace_error(session, event); case PERF_RECORD_THREAD_MAP: - return tool->thread_map(tool, event, session); + return tool->thread_map(session, event); case PERF_RECORD_CPU_MAP: - return tool->cpu_map(tool, event, session); + return tool->cpu_map(session, event); case PERF_RECORD_STAT_CONFIG: - return tool->stat_config(tool, event, session); + return tool->stat_config(session, event); case PERF_RECORD_STAT: - return tool->stat(tool, event, session); + return tool->stat(session, event); case PERF_RECORD_STAT_ROUND: - return tool->stat_round(tool, event, session); + return tool->stat_round(session, event); case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - return tool->time_conv(tool, event, session); + return tool->time_conv(session, event); case PERF_RECORD_HEADER_FEATURE: - return tool->feature(tool, event, session); + return tool->feature(session, event); default: return -EINVAL; } @@ -1973,12 +1961,11 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg) return false; } -int maps__set_kallsyms_ref_reloc_sym(struct map **maps, - const char *symbol_name, u64 addr) +int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr) { char *bracket; - int i; struct ref_reloc_sym *ref; + struct kmap *kmap; ref = zalloc(sizeof(struct ref_reloc_sym)); if (ref == NULL) @@ -1996,13 +1983,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, ref->addr = addr; - for (i = 0; i < MAP__NR_TYPES; ++i) { - struct kmap *kmap = map__kmap(maps[i]); - - if (!kmap) - continue; + kmap = map__kmap(map); + if (kmap) kmap->ref_reloc_sym = ref; - } return 0; } @@ -2138,9 +2121,8 @@ out: return err; } -int perf_event__process_id_index(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_id_index(struct perf_session *session, + union perf_event *event) { struct perf_evlist *evlist = session->evlist; struct id_index_event *ie = &event->id_index; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index da40b4b380ca..d96eccd7d27f 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -120,9 +120,8 @@ int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample); -int perf_event__process_id_index(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_id_index(struct perf_session *session, + union perf_event *event); int perf_event__synthesize_id_index(struct perf_tool *tool, perf_event__handler_t process, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 001be4f9d3b9..63f758c655d5 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -1,12 +1,22 @@ #!/usr/bin/python from os import getenv +from subprocess import Popen, PIPE +from re import sub + +def clang_has_option(option): + return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] cc = getenv("CC") if cc == "clang": - from _sysconfigdata import build_time_vars - from re import sub - build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"]) + from distutils.sysconfig import get_config_vars + vars = get_config_vars() + for var in ('CFLAGS', 'OPT'): + vars[var] = sub("-specs=[^ ]+", "", vars[var]) + if not clang_has_option("-mcet"): + vars[var] = sub("-mcet", "", vars[var]) + if not clang_has_option("-fcf-protection"): + vars[var] = sub("-fcf-protection", "", vars[var]) from distutils.core import setup, Extension @@ -27,7 +37,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) -cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' ] +cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] if cc != "clang": cflags += ['-Wno-cast-function-type' ] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 26a68dfd8a4f..f96c005b3c41 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2,7 +2,7 @@ #include <errno.h> #include <inttypes.h> #include <regex.h> -#include <sys/mman.h> +#include <linux/mman.h> #include "sort.h" #include "hist.h" #include "comm.h" @@ -282,7 +282,7 @@ static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym, ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level); if (sym && map) { - if (map->type == MAP__VARIABLE) { + if (sym->type == STT_OBJECT) { ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name); ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx", ip - map->unmap_ip(map, sym->start)); @@ -331,24 +331,18 @@ struct sort_entry sort_sym = { /* --sort srcline */ -char *hist_entry__get_srcline(struct hist_entry *he) +char *hist_entry__srcline(struct hist_entry *he) { - struct map *map = he->ms.map; - - if (!map) - return SRCLINE_UNKNOWN; - - return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true, he->ip); + return map__srcline(he->ms.map, he->ip, he->ms.sym); } static int64_t sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right) { if (!left->srcline) - left->srcline = hist_entry__get_srcline(left); + left->srcline = hist_entry__srcline(left); if (!right->srcline) - right->srcline = hist_entry__get_srcline(right); + right->srcline = hist_entry__srcline(right); return strcmp(right->srcline, left->srcline); } @@ -357,7 +351,7 @@ static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { if (!he->srcline) - he->srcline = hist_entry__get_srcline(he); + he->srcline = hist_entry__srcline(he); return repsep_snprintf(bf, size, "%-.*s", width, he->srcline); } @@ -371,33 +365,20 @@ struct sort_entry sort_srcline = { /* --sort srcline_from */ +static char *addr_map_symbol__srcline(struct addr_map_symbol *ams) +{ + return map__srcline(ams->map, ams->al_addr, ams->sym); +} + static int64_t sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->branch_info->srcline_from) { - struct map *map = left->branch_info->from.map; - if (!map) - left->branch_info->srcline_from = SRCLINE_UNKNOWN; - else - left->branch_info->srcline_from = get_srcline(map->dso, - map__rip_2objdump(map, - left->branch_info->from.al_addr), - left->branch_info->from.sym, - true, true, - left->branch_info->from.al_addr); - } - if (!right->branch_info->srcline_from) { - struct map *map = right->branch_info->from.map; - if (!map) - right->branch_info->srcline_from = SRCLINE_UNKNOWN; - else - right->branch_info->srcline_from = get_srcline(map->dso, - map__rip_2objdump(map, - right->branch_info->from.al_addr), - right->branch_info->from.sym, - true, true, - right->branch_info->from.al_addr); - } + if (!left->branch_info->srcline_from) + left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from); + + if (!right->branch_info->srcline_from) + right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from); + return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -419,30 +400,12 @@ struct sort_entry sort_srcline_from = { static int64_t sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) { - if (!left->branch_info->srcline_to) { - struct map *map = left->branch_info->to.map; - if (!map) - left->branch_info->srcline_to = SRCLINE_UNKNOWN; - else - left->branch_info->srcline_to = get_srcline(map->dso, - map__rip_2objdump(map, - left->branch_info->to.al_addr), - left->branch_info->from.sym, - true, true, - left->branch_info->to.al_addr); - } - if (!right->branch_info->srcline_to) { - struct map *map = right->branch_info->to.map; - if (!map) - right->branch_info->srcline_to = SRCLINE_UNKNOWN; - else - right->branch_info->srcline_to = get_srcline(map->dso, - map__rip_2objdump(map, - right->branch_info->to.al_addr), - right->branch_info->to.sym, - true, true, - right->branch_info->to.al_addr); - } + if (!left->branch_info->srcline_to) + left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to); + + if (!right->branch_info->srcline_to) + right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to); + return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -638,7 +601,7 @@ static char *get_trace_output(struct hist_entry *he) { struct trace_seq seq; struct perf_evsel *evsel; - struct pevent_record rec = { + struct tep_record rec = { .data = he->raw_data, .size = he->raw_size, }; @@ -647,10 +610,10 @@ static char *get_trace_output(struct hist_entry *he) trace_seq_init(&seq); if (symbol_conf.raw_trace) { - pevent_print_fields(&seq, he->raw_data, he->raw_size, - evsel->tp_format); + tep_print_fields(&seq, he->raw_data, he->raw_size, + evsel->tp_format); } else { - pevent_event_info(&seq, evsel->tp_format, &rec); + tep_event_info(&seq, evsel->tp_format, &rec); } /* * Trim the buffer, it starts at 4KB and we're not going to @@ -1211,7 +1174,7 @@ static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf, /* print [s] for shared data mmaps */ if ((he->cpumode != PERF_RECORD_MISC_KERNEL) && - map && (map->type == MAP__VARIABLE) && + map && !(map->prot & PROT_EXEC) && (map->flags & MAP_SHARED) && (map->maj || map->min || map->ino || map->ino_generation)) @@ -1921,7 +1884,7 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, struct hpp_dynamic_entry { struct perf_hpp_fmt hpp; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; unsigned dynamic_len; bool raw_trace; }; @@ -1936,7 +1899,7 @@ static int hde_width(struct hpp_dynamic_entry *hde) if (namelen > len) len = namelen; - if (!(hde->field->flags & FIELD_IS_STRING)) { + if (!(hde->field->flags & TEP_FIELD_IS_STRING)) { /* length for print hex numbers */ fieldlen = hde->field->size * 2 + 2; } @@ -1952,7 +1915,7 @@ static void update_dynamic_len(struct hpp_dynamic_entry *hde, struct hist_entry *he) { char *str, *pos; - struct format_field *field = hde->field; + struct tep_format_field *field = hde->field; size_t namelen; bool last = false; @@ -2037,7 +2000,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hpp_dynamic_entry *hde; size_t len = fmt->user_len; char *str, *pos; - struct format_field *field; + struct tep_format_field *field; size_t namelen; bool last = false; int ret; @@ -2084,7 +2047,7 @@ static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct trace_seq seq; raw_field: trace_seq_init(&seq); - pevent_print_field(&seq, he->raw_data, hde->field); + tep_print_field(&seq, he->raw_data, hde->field); str = seq.buffer; } @@ -2097,7 +2060,7 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, struct hist_entry *a, struct hist_entry *b) { struct hpp_dynamic_entry *hde; - struct format_field *field; + struct tep_format_field *field; unsigned offset, size; hde = container_of(fmt, struct hpp_dynamic_entry, hpp); @@ -2108,10 +2071,10 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt, } field = hde->field; - if (field->flags & FIELD_IS_DYNAMIC) { + if (field->flags & TEP_FIELD_IS_DYNAMIC) { unsigned long long dyn; - pevent_read_number_field(field, a->raw_data, &dyn); + tep_read_number_field(field, a->raw_data, &dyn); offset = dyn & 0xffff; size = (dyn >> 16) & 0xffff; @@ -2154,7 +2117,7 @@ static void hde_free(struct perf_hpp_fmt *fmt) } static struct hpp_dynamic_entry * -__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field, +__alloc_dynamic_entry(struct perf_evsel *evsel, struct tep_format_field *field, int level) { struct hpp_dynamic_entry *hde; @@ -2289,7 +2252,7 @@ static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_nam } static int __dynamic_dimension__add(struct perf_evsel *evsel, - struct format_field *field, + struct tep_format_field *field, bool raw_trace, int level) { struct hpp_dynamic_entry *hde; @@ -2307,7 +2270,7 @@ static int __dynamic_dimension__add(struct perf_evsel *evsel, static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level) { int ret; - struct format_field *field; + struct tep_format_field *field; field = evsel->tp_format->format.fields; while (field) { @@ -2342,13 +2305,13 @@ static int add_all_matching_fields(struct perf_evlist *evlist, { int ret = -ESRCH; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; evlist__for_each_entry(evlist, evsel) { if (evsel->attr.type != PERF_TYPE_TRACEPOINT) continue; - field = pevent_find_any_field(evsel->tp_format, field_name); + field = tep_find_any_field(evsel->tp_format, field_name); if (field == NULL) continue; @@ -2364,7 +2327,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, { char *str, *event_name, *field_name, *opt_name; struct perf_evsel *evsel; - struct format_field *field; + struct tep_format_field *field; bool raw_trace = symbol_conf.raw_trace; int ret = 0; @@ -2415,7 +2378,7 @@ static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok, if (!strcmp(field_name, "*")) { ret = add_evsel_fields(evsel, raw_trace, level); } else { - field = pevent_find_any_field(evsel->tp_format, field_name); + field = tep_find_any_field(evsel->tp_format, field_name); if (field == NULL) { pr_debug("Cannot find event field for %s.%s\n", event_name, field_name); @@ -2582,7 +2545,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - if (sd->entry == &sort_mem_dcacheline && cacheline_size == 0) + if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0) return -EINVAL; if (sd->entry == &sort_mem_daddr_sym) @@ -2628,7 +2591,7 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str, if (*tok) { ret = sort_dimension__add(list, tok, evlist, level); if (ret == -EINVAL) { - if (!cacheline_size && !strncasecmp(tok, "dcacheline", strlen(tok))) + if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok))) pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system"); else pr_err("Invalid --sort key: `%s'", tok); diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 035b62e2c60b..a97cf8e6be86 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -112,6 +112,8 @@ struct hist_entry { char level; u8 filtered; + + u16 callchain_size; union { /* * Since perf diff only supports the stdio output, TUI @@ -151,6 +153,11 @@ struct hist_entry { struct callchain_root callchain[0]; /* must be last member */ }; +static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) +{ + return he->callchain_size != 0; +} + static inline bool hist_entry__has_pairs(struct hist_entry *he) { return !list_empty(&he->pairs.node); @@ -186,13 +193,13 @@ static inline float hist_entry__get_percent_limit(struct hist_entry *he) static inline u64 cl_address(u64 address) { /* return the cacheline of the address */ - return (address & ~(cacheline_size - 1)); + return (address & ~(cacheline_size() - 1)); } static inline u64 cl_offset(u64 address) { /* return the cacheline of the address */ - return (address & (cacheline_size - 1)); + return (address & (cacheline_size() - 1)); } enum sort_mode { @@ -269,7 +276,7 @@ extern struct sort_entry sort_thread; extern struct list_head hist_entry__sort_list; struct perf_evlist; -struct pevent; +struct tep_handle; int setup_sorting(struct perf_evlist *evlist); int setup_output_field(void); void reset_output_field(void); @@ -292,5 +299,5 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); -char *hist_entry__get_srcline(struct hist_entry *he); +char *hist_entry__srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index 3c21fd059b64..e767c4a9d4d2 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -85,6 +85,9 @@ static struct symbol *new_inline_sym(struct dso *dso, struct symbol *inline_sym; char *demangled = NULL; + if (!funcname) + funcname = "??"; + if (dso) { demangled = dso__demangle_sym(dso, 0, funcname); if (demangled) @@ -103,6 +106,7 @@ static struct symbol *new_inline_sym(struct dso *dso, inline_sym = symbol__new(base_sym ? base_sym->start : 0, base_sym ? base_sym->end : 0, base_sym ? base_sym->binding : 0, + base_sym ? base_sym->type : 0, funcname); if (inline_sym) inline_sym->inlined = 1; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c new file mode 100644 index 000000000000..e7b4c44ebb62 --- /dev/null +++ b/tools/perf/util/stat-display.c @@ -0,0 +1,1166 @@ +#include <stdio.h> +#include <inttypes.h> +#include <linux/time64.h> +#include <math.h> +#include "evlist.h" +#include "evsel.h" +#include "stat.h" +#include "top.h" +#include "thread_map.h" +#include "cpumap.h" +#include "string2.h" +#include "sane_ctype.h" +#include "cgroup.h" +#include <math.h> +#include <api/fs/fs.h> + +#define CNTR_NOT_SUPPORTED "<not supported>" +#define CNTR_NOT_COUNTED "<not counted>" + +static bool is_duration_time(struct perf_evsel *evsel) +{ + return !strcmp(evsel->name, "duration_time"); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena) +{ + if (config->csv_output) { + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, + run, + config->csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + } +} + +static void print_noise_pct(struct perf_stat_config *config, + double total, double avg) +{ + double pct = rel_stddev_stats(total, avg); + + if (config->csv_output) + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); + else if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg) +{ + struct perf_stat_evsel *ps; + + if (config->run_count == 1) + return; + + ps = evsel->stats; + print_noise_pct(config, stddev_stats(&ps->res_stats[0]), avg); +} + +static void aggr_printout(struct perf_stat_config *config, + struct perf_evsel *evsel, int id, int nr) +{ + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(config->output, "S%d-C%*d%s%*d%s", + cpu_map__id_to_socket(id), + config->csv_output ? 0 : -8, + cpu_map__id_to_cpu(id), + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_SOCKET: + fprintf(config->output, "S%*d%s%*d%s", + config->csv_output ? 0 : -5, + id, + config->csv_sep, + config->csv_output ? 0 : 4, + nr, + config->csv_sep); + break; + case AGGR_NONE: + fprintf(config->output, "CPU%*d%s", + config->csv_output ? 0 : -4, + perf_evsel__cpus(evsel)->map[id], config->csv_sep); + break; + case AGGR_THREAD: + fprintf(config->output, "%*s-%*d%s", + config->csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + config->csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + config->csv_sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + default: + break; + } +} + +struct outstate { + FILE *fh; + bool newline; + const char *prefix; + int nfields; + int id, nr; + struct perf_evsel *evsel; +}; + +#define METRIC_LEN 35 + +static void new_line_std(struct perf_stat_config *config __maybe_unused, + void *ctx) +{ + struct outstate *os = ctx; + + os->newline = true; +} + +static void do_new_line_std(struct perf_stat_config *config, + struct outstate *os) +{ + fputc('\n', os->fh); + fputs(os->prefix, os->fh); + aggr_printout(config, os->evsel, os->id, os->nr); + if (config->aggr_mode == AGGR_NONE) + fprintf(os->fh, " "); + fprintf(os->fh, " "); +} + +static void print_metric_std(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + int n; + bool newline = os->newline; + + os->newline = false; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%-*s", METRIC_LEN, ""); + return; + } + + if (newline) + do_new_line_std(config, os); + + n = fprintf(out, " # "); + if (color) + n += color_fprintf(out, color, fmt, val); + else + n += fprintf(out, fmt, val); + fprintf(out, " %-*s", METRIC_LEN - n - 1, unit); +} + +static void new_line_csv(struct perf_stat_config *config, void *ctx) +{ + struct outstate *os = ctx; + int i; + + fputc('\n', os->fh); + if (os->prefix) + fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + aggr_printout(config, os->evsel, os->id, os->nr); + for (i = 0; i < os->nfields; i++) + fputs(config->csv_sep, os->fh); +} + +static void print_metric_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, + const char *color __maybe_unused, + const char *fmt, const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + + if (unit == NULL || fmt == NULL) { + fprintf(out, "%s%s", config->csv_sep, config->csv_sep); + return; + } + snprintf(buf, sizeof(buf), fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + while (isspace(*unit)) + unit++; + fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit); +} + +/* Filter out some columns that don't work well in metrics only mode */ + +static bool valid_only_metric(const char *unit) +{ + if (!unit) + return false; + if (strstr(unit, "/sec") || + strstr(unit, "hz") || + strstr(unit, "Hz") || + strstr(unit, "CPUs utilized")) + return false; + return true; +} + +static const char *fixunit(char *buf, struct perf_evsel *evsel, + const char *unit) +{ + if (!strncmp(unit, "of all", 6)) { + snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel), + unit); + return buf; + } + return unit; +} + +static void print_metric_only(struct perf_stat_config *config, + void *ctx, const char *color, const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[1024], str[1024]; + unsigned mlen = config->metric_only_len; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(buf, os->evsel, unit); + if (mlen < strlen(unit)) + mlen = strlen(unit) + 1; + + if (color) + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; + + color_snprintf(str, sizeof(str), color ?: "", fmt, val); + fprintf(out, "%*s ", mlen, str); +} + +static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, + void *ctx, const char *color __maybe_unused, + const char *fmt, + const char *unit, double val) +{ + struct outstate *os = ctx; + FILE *out = os->fh; + char buf[64], *vals, *ends; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof buf, fmt, val); + ends = vals = ltrim(buf); + while (isdigit(*ends) || *ends == '.') + ends++; + *ends = 0; + fprintf(out, "%s%s", vals, config->csv_sep); +} + +static void new_line_metric(struct perf_stat_config *config __maybe_unused, + void *ctx __maybe_unused) +{ +} + +static void print_metric_header(struct perf_stat_config *config, + void *ctx, const char *color __maybe_unused, + const char *fmt __maybe_unused, + const char *unit, double val __maybe_unused) +{ + struct outstate *os = ctx; + char tbuf[1024]; + + if (!valid_only_metric(unit)) + return; + unit = fixunit(tbuf, os->evsel, unit); + if (config->csv_output) + fprintf(os->fh, "%s%s", unit, config->csv_sep); + else + fprintf(os->fh, "%*s ", config->metric_only_len, unit); +} + +static int first_shadow_cpu(struct perf_stat_config *config, + struct perf_evsel *evsel, int id) +{ + struct perf_evlist *evlist = evsel->evlist; + int i; + + if (!config->aggr_get_id) + return 0; + + if (config->aggr_mode == AGGR_NONE) + return id; + + if (config->aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (config->aggr_get_id(config, evlist->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + +static void abs_printout(struct perf_stat_config *config, + int id, int nr, struct perf_evsel *evsel, double avg) +{ + FILE *output = config->output; + double sc = evsel->scale; + const char *fmt; + + if (config->csv_output) { + fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + } else { + if (config->big_num) + fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; + else + fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; + } + + aggr_printout(config, evsel, id, nr); + + fprintf(output, fmt, avg, config->csv_sep); + + if (evsel->unit) + fprintf(output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + evsel->unit, config->csv_sep); + + fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel)); + + if (evsel->cgrp) + fprintf(output, "%s%s", config->csv_sep, evsel->cgrp->name); +} + +static bool is_mixed_hw_group(struct perf_evsel *counter) +{ + struct perf_evlist *evlist = counter->evlist; + u32 pmu_type = counter->attr.type; + struct perf_evsel *pos; + + if (counter->nr_members < 2) + return false; + + evlist__for_each_entry(evlist, pos) { + /* software events can be part of any hardware group */ + if (pos->attr.type == PERF_TYPE_SOFTWARE) + continue; + if (pmu_type == PERF_TYPE_SOFTWARE) { + pmu_type = pos->attr.type; + continue; + } + if (pmu_type != pos->attr.type) + return true; + } + + return false; +} + +static void printout(struct perf_stat_config *config, int id, int nr, + struct perf_evsel *counter, double uval, + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) +{ + struct perf_stat_output_ctx out; + struct outstate os = { + .fh = config->output, + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, + }; + print_metric_t pm = print_metric_std; + new_line_t nl; + + if (config->metric_only) { + nl = new_line_metric; + if (config->csv_output) + pm = print_metric_only_csv; + else + pm = print_metric_only; + } else + nl = new_line_std; + + if (config->csv_output && !config->metric_only) { + static int aggr_fields[] = { + [AGGR_GLOBAL] = 0, + [AGGR_THREAD] = 1, + [AGGR_NONE] = 1, + [AGGR_SOCKET] = 2, + [AGGR_CORE] = 2, + }; + + pm = print_metric_csv; + nl = new_line_csv; + os.nfields = 3; + os.nfields += aggr_fields[config->aggr_mode]; + if (counter->cgrp) + os.nfields++; + } + if (run == 0 || ena == 0 || counter->counts->scaled == -1) { + if (config->metric_only) { + pm(config, &os, NULL, "", "", 0); + return; + } + aggr_printout(config, counter, id, nr); + + fprintf(config->output, "%*s%s", + config->csv_output ? 0 : 18, + counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, + config->csv_sep); + + if (counter->supported) { + config->print_free_counters_hint = 1; + if (is_mixed_hw_group(counter)) + config->print_mixed_hw_group_error = 1; + } + + fprintf(config->output, "%-*s%s", + config->csv_output ? 0 : config->unit_width, + counter->unit, config->csv_sep); + + fprintf(config->output, "%*s", + config->csv_output ? 0 : -25, + perf_evsel__name(counter)); + + if (counter->cgrp) + fprintf(config->output, "%s%s", + config->csv_sep, counter->cgrp->name); + + if (!config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + print_noise(config, counter, noise); + print_running(config, run, ena); + if (config->csv_output) + pm(config, &os, NULL, NULL, "", 0); + return; + } + + if (!config->metric_only) + abs_printout(config, id, nr, counter, uval); + + out.print_metric = pm; + out.new_line = nl; + out.ctx = &os; + out.force_header = false; + + if (config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } + + perf_stat__print_shadow_stats(config, counter, uval, + first_shadow_cpu(config, counter, id), + &out, &config->metric_events, st); + if (!config->csv_output && !config->metric_only) { + print_noise(config, counter, noise); + print_running(config, run, ena); + } +} + +static void aggr_update_shadow(struct perf_stat_config *config, + struct perf_evlist *evlist) +{ + int cpu, s2, id, s; + u64 val; + struct perf_evsel *counter; + + for (s = 0; s < config->aggr_map->nr; s++) { + id = config->aggr_map->map[s]; + evlist__for_each_entry(evlist, counter) { + val = 0; + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + s2 = config->aggr_get_id(config, evlist->cpus, cpu); + if (s2 != id) + continue; + val += perf_counts(counter->counts, cpu, 0)->val; + } + perf_stat__update_shadow_stats(counter, val, + first_shadow_cpu(config, counter, id), + &rt_stat); + } + } +} + +static void uniquify_event_name(struct perf_evsel *counter) +{ + char *new_name; + char *config; + + if (counter->uniquified_name || + !counter->pmu_name || !strncmp(counter->name, counter->pmu_name, + strlen(counter->pmu_name))) + return; + + config = strchr(counter->name, '/'); + if (config) { + if (asprintf(&new_name, + "%s%s", counter->pmu_name, config) > 0) { + free(counter->name); + counter->name = new_name; + } + } else { + if (asprintf(&new_name, + "%s [%s]", counter->name, counter->pmu_name) > 0) { + free(counter->name); + counter->name = new_name; + } + } + + counter->uniquified_name = true; +} + +static void collect_all_aliases(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + struct perf_evlist *evlist = counter->evlist; + struct perf_evsel *alias; + + alias = list_prepare_entry(counter, &(evlist->entries), node); + list_for_each_entry_continue (alias, &evlist->entries, node) { + if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) || + alias->scale != counter->scale || + alias->cgrp != counter->cgrp || + strcmp(alias->unit, counter->unit) || + perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) + break; + alias->merged_stat = true; + cb(config, alias, data, false); + } +} + +static bool collect_data(struct perf_stat_config *config, struct perf_evsel *counter, + void (*cb)(struct perf_stat_config *config, struct perf_evsel *counter, void *data, + bool first), + void *data) +{ + if (counter->merged_stat) + return false; + cb(config, counter, data, true); + if (config->no_merge) + uniquify_event_name(counter); + else if (counter->auto_merge_stats) + collect_all_aliases(config, counter, cb, data); + return true; +} + +struct aggr_data { + u64 ena, run, val; + int id; + int nr; + int cpu; +}; + +static void aggr_cb(struct perf_stat_config *config, + struct perf_evsel *counter, void *data, bool first) +{ + struct aggr_data *ad = data; + int cpu, s2; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct perf_counts_values *counts; + + s2 = config->aggr_get_id(config, perf_evsel__cpus(counter), cpu); + if (s2 != ad->id) + continue; + if (first) + ad->nr++; + counts = perf_counts(counter->counts, cpu, 0); + /* + * When any result is bad, make them all to give + * consistent output in interval mode. + */ + if (counts->ena == 0 || counts->run == 0 || + counter->counts->scaled == -1) { + ad->ena = 0; + ad->run = 0; + break; + } + ad->val += counts->val; + ad->ena += counts->ena; + ad->run += counts->run; + } +} + +static void print_aggr(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + struct perf_evsel *counter; + int s, id, nr; + double uval; + u64 ena, run, val; + bool first; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + aggr_update_shadow(config, evlist); + + /* + * With metric_only everything is on a single line. + * Without each counter has its own line. + */ + for (s = 0; s < config->aggr_map->nr; s++) { + struct aggr_data ad; + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + ad.id = id = config->aggr_map->map[s]; + first = true; + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + continue; + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (first && metric_only) { + first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); + } + if (metric_only) + fputc('\n', output); + } +} + +static int cmp_val(const void *a, const void *b) +{ + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret, + struct target *_target) +{ + int cpu, thread, i = 0; + double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(_target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_stat_config *config, + struct target *_target, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads, _target); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { + if (prefix) + fprintf(output, "%s", prefix); + + id = buf[thread].id; + if (config->stats) + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &config->stats[id]); + else + printout(config, id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); + fputc('\n', output); + } + + free(buf); +} + +struct caggr_data { + double avg, avg_enabled, avg_running; +}; + +static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct caggr_data *cd = data; + struct perf_stat_evsel *ps = counter->stats; + + cd->avg += avg_stats(&ps->res_stats[0]); + cd->avg_enabled += avg_stats(&ps->res_stats[1]); + cd->avg_running += avg_stats(&ps->res_stats[2]); +} + +/* + * Print out the results of a single counter: + * aggregated counts in system-wide mode + */ +static void print_counter_aggr(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + double uval; + struct caggr_data cd = { .avg = 0.0 }; + + if (!collect_data(config, counter, counter_aggr_cb, &cd)) + return; + + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = cd.avg * counter->scale; + printout(config, -1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); + if (!metric_only) + fprintf(output, "\n"); +} + +static void counter_cb(struct perf_stat_config *config __maybe_unused, + struct perf_evsel *counter, void *data, + bool first __maybe_unused) +{ + struct aggr_data *ad = data; + + ad->val += perf_counts(counter->counts, ad->cpu, 0)->val; + ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena; + ad->run += perf_counts(counter->counts, ad->cpu, 0)->run; +} + +/* + * Print out the results of a single counter: + * does not use aggregated count in system-wide + */ +static void print_counter(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + FILE *output = config->output; + u64 ena, run, val; + double uval; + int cpu; + + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { + struct aggr_data ad = { .cpu = cpu }; + + if (!collect_data(config, counter, counter_cb, &ad)) + return; + val = ad.val; + ena = ad.ena; + run = ad.run; + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + + fputc('\n', output); + } +} + +static void print_no_aggr_metric(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix) +{ + int cpu; + int nrcpus = 0; + struct perf_evsel *counter; + u64 ena, run, val; + double uval; + + nrcpus = evlist->cpus->nr; + for (cpu = 0; cpu < nrcpus; cpu++) { + bool first = true; + + if (prefix) + fputs(prefix, config->output); + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + if (first) { + aggr_printout(config, counter, cpu, 0); + first = false; + } + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; + + uval = val * counter->scale; + printout(config, cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); + } + fputc('\n', config->output); + } +} + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_SOCKET] = 12, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 24, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_SOCKET] = "socket,cpus", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_GLOBAL] = "" +}; + +static void print_metric_headers(struct perf_stat_config *config, + struct perf_evlist *evlist, + const char *prefix, bool no_indent) +{ + struct perf_stat_output_ctx out; + struct perf_evsel *counter; + struct outstate os = { + .fh = config->output + }; + + if (prefix) + fprintf(config->output, "%s", prefix); + + if (!config->csv_output && !no_indent) + fprintf(config->output, "%*s", + aggr_header_lens[config->aggr_mode], ""); + if (config->csv_output) { + if (config->interval) + fputs("time,", config->output); + fputs(aggr_header_csv[config->aggr_mode], config->output); + } + + /* Print metrics headers only */ + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + os.evsel = counter; + out.ctx = &os; + out.print_metric = print_metric_header; + out.new_line = new_line_metric; + out.force_header = true; + os.evsel = counter; + perf_stat__print_shadow_stats(config, counter, 0, + 0, + &out, + &config->metric_events, + &rt_stat); + } + fputc('\n', config->output); +} + +static void print_interval(struct perf_stat_config *config, + struct perf_evlist *evlist, + char *prefix, struct timespec *ts) +{ + bool metric_only = config->metric_only; + unsigned int unit_width = config->unit_width; + FILE *output = config->output; + static int num_print_interval; + + if (config->interval_clear) + puts(CONSOLE_CLEAR); + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + + if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { + switch (config->aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU "); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time"); + if (!metric_only) + fprintf(output, " counts %*s events\n", unit_width, "unit"); + case AGGR_UNSET: + break; + } + } + + if ((num_print_interval == 0 || config->interval_clear) && metric_only) + print_metric_headers(config, evlist, " ", true); + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(struct perf_stat_config *config, + struct target *_target, + int argc, const char **argv) +{ + FILE *output = config->output; + int i; + + fflush(stdout); + + if (!config->csv_output) { + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); + + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + } +} + +static int get_precision(double num) +{ + if (num > 1) + return 0; + + return lround(ceil(-log10(num))); +} + +static void print_table(struct perf_stat_config *config, + FILE *output, int precision, double avg) +{ + char tmp[64]; + int idx, indent = 0; + + scnprintf(tmp, 64, " %17.*f", precision, avg); + while (tmp[indent] == ' ') + indent++; + + fprintf(output, "%*s# Table of individual measurements:\n", indent, ""); + + for (idx = 0; idx < config->run_count; idx++) { + double run = (double) config->walltime_run[idx] / NSEC_PER_SEC; + int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5); + + fprintf(output, " %17.*f (%+.*f) ", + precision, run, precision, run - avg); + + for (h = 0; h < n; h++) + fprintf(output, "#"); + + fprintf(output, "\n"); + } + + fprintf(output, "\n%*s# Final result:\n", indent, ""); +} + +static double timeval2double(struct timeval *t) +{ + return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC; +} + +static void print_footer(struct perf_stat_config *config) +{ + double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + FILE *output = config->output; + int n; + + if (!config->null_run) + fprintf(output, "\n"); + + if (config->run_count == 1) { + fprintf(output, " %17.9f seconds time elapsed", avg); + + if (config->ru_display) { + double ru_utime = timeval2double(&config->ru_data.ru_utime); + double ru_stime = timeval2double(&config->ru_data.ru_stime); + + fprintf(output, "\n\n"); + fprintf(output, " %17.9f seconds user\n", ru_utime); + fprintf(output, " %17.9f seconds sys\n", ru_stime); + } + } else { + double sd = stddev_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; + /* + * Display at most 2 more significant + * digits than the stddev inaccuracy. + */ + int precision = get_precision(sd) + 2; + + if (config->walltime_run_table) + print_table(config, output, precision, avg); + + fprintf(output, " %17.*f +- %.*f seconds time elapsed", + precision, avg, precision, sd); + + print_noise_pct(config, sd, avg); + } + fprintf(output, "\n\n"); + + if (config->print_free_counters_hint && + sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 && + n > 0) + fprintf(output, +"Some events weren't counted. Try disabling the NMI watchdog:\n" +" echo 0 > /proc/sys/kernel/nmi_watchdog\n" +" perf stat ...\n" +" echo 1 > /proc/sys/kernel/nmi_watchdog\n"); + + if (config->print_mixed_hw_group_error) + fprintf(output, + "The events in group usually have to be from " + "the same PMU. Try reorganizing the group.\n"); +} + +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv) +{ + bool metric_only = config->metric_only; + int interval = config->interval; + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(config, evlist, prefix = buf, ts); + else + print_header(config, _target, argc, argv); + + if (metric_only) { + static int num_print_iv; + + if (num_print_iv == 0 && !interval) + print_metric_headers(config, evlist, prefix, false); + if (num_print_iv++ == 25) + num_print_iv = 0; + if (config->aggr_mode == AGGR_GLOBAL && prefix) + fprintf(config->output, "%s", prefix); + } + + switch (config->aggr_mode) { + case AGGR_CORE: + case AGGR_SOCKET: + print_aggr(config, evlist, prefix); + break; + case AGGR_THREAD: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_aggr_thread(config, _target, counter, prefix); + } + break; + case AGGR_GLOBAL: + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter_aggr(config, counter, prefix); + } + if (metric_only) + fputc('\n', config->output); + break; + case AGGR_NONE: + if (metric_only) + print_no_aggr_metric(config, evlist, prefix); + else { + evlist__for_each_entry(evlist, counter) { + if (is_duration_time(counter)) + continue; + print_counter(config, counter, prefix); + } + } + break; + case AGGR_UNSET: + default: + break; + } + + if (!interval && !config->csv_output) + print_footer(config); + + fflush(config->output); +} diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 594d14a02b67..8ad32763cfff 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -410,7 +410,8 @@ static double runtime_stat_n(struct runtime_stat *st, return v->stats.n; } -static void print_stalled_cycles_frontend(int cpu, +static void print_stalled_cycles_frontend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -427,13 +428,14 @@ static void print_stalled_cycles_frontend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); if (ratio) - out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle", + out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle", ratio); else - out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0); + out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0); } -static void print_stalled_cycles_backend(int cpu, +static void print_stalled_cycles_backend(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, struct runtime_stat *st) @@ -449,10 +451,11 @@ static void print_stalled_cycles_backend(int cpu, color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio); } -static void print_branch_misses(int cpu, +static void print_branch_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -469,10 +472,11 @@ static void print_branch_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio); } -static void print_l1_dcache_misses(int cpu, +static void print_l1_dcache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -490,10 +494,11 @@ static void print_l1_dcache_misses(int cpu, color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio); } -static void print_l1_icache_misses(int cpu, +static void print_l1_icache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -510,10 +515,11 @@ static void print_l1_icache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio); } -static void print_dtlb_cache_misses(int cpu, +static void print_dtlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -529,10 +535,11 @@ static void print_dtlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio); } -static void print_itlb_cache_misses(int cpu, +static void print_itlb_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -548,10 +555,11 @@ static void print_itlb_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio); } -static void print_ll_cache_misses(int cpu, +static void print_ll_cache_misses(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, double avg, struct perf_stat_output_ctx *out, @@ -567,7 +575,7 @@ static void print_ll_cache_misses(int cpu, ratio = avg / total * 100.0; color = get_ratio_color(GRC_CACHE_MISSES, ratio); - out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); + out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio); } /* @@ -674,7 +682,8 @@ static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) return sanitize_val(1.0 - sum); } -static void print_smi_cost(int cpu, struct perf_evsel *evsel, +static void print_smi_cost(struct perf_stat_config *config, + int cpu, struct perf_evsel *evsel, struct perf_stat_output_ctx *out, struct runtime_stat *st) { @@ -694,11 +703,12 @@ static void print_smi_cost(int cpu, struct perf_evsel *evsel, if (cost > 10) color = PERF_COLOR_RED; - out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost); - out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num); + out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost); + out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num); } -static void generic_metric(const char *metric_expr, +static void generic_metric(struct perf_stat_config *config, + const char *metric_expr, struct perf_evsel **metric_events, char *name, const char *metric_name, @@ -737,20 +747,21 @@ static void generic_metric(const char *metric_expr, const char *p = metric_expr; if (expr__parse(&ratio, &pctx, &p) == 0) - print_metric(ctxp, NULL, "%8.1f", + print_metric(config, ctxp, NULL, "%8.1f", metric_name ? metric_name : out->force_header ? name : "", ratio); else - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, out->force_header ? (metric_name ? metric_name : name) : "", 0); } else - print_metric(ctxp, NULL, NULL, "", 0); + print_metric(config, ctxp, NULL, NULL, "", 0); } -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, @@ -769,10 +780,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", ratio); } else { - print_metric(ctxp, NULL, NULL, "insn per cycle", 0); + print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); } total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, @@ -783,20 +794,20 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ctx, cpu)); if (total && avg) { - out->new_line(ctxp); + out->new_line(config, ctxp); ratio = total / avg; - print_metric(ctxp, NULL, "%7.2f ", + print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", ratio); } else if (have_frontend_stalled) { - print_metric(ctxp, NULL, NULL, + print_metric(config, ctxp, NULL, NULL, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) - print_branch_misses(cpu, evsel, avg, out, st); + print_branch_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all branches", 0); + print_metric(config, ctxp, NULL, NULL, "of all branches", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | @@ -804,9 +815,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) - print_l1_dcache_misses(cpu, evsel, avg, out, st); + print_l1_dcache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | @@ -814,9 +825,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) - print_l1_icache_misses(cpu, evsel, avg, out, st); + print_l1_icache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | @@ -824,9 +835,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out, st); + print_dtlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | @@ -834,9 +845,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) - print_itlb_cache_misses(cpu, evsel, avg, out, st); + print_itlb_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( evsel->attr.type == PERF_TYPE_HW_CACHE && evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | @@ -844,9 +855,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) - print_ll_cache_misses(cpu, evsel, avg, out, st); + print_ll_cache_misses(config, cpu, evsel, avg, out, st); else - print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); + print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); @@ -854,32 +865,32 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = avg * 100 / total; if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.3f %%", + print_metric(config, ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else - print_metric(ctxp, NULL, NULL, "of all cache refs", 0); + print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out, st); + print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out, st); + print_stalled_cycles_backend(config, cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; - print_metric(ctxp, NULL, "%8.3f", "GHz", ratio); + print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio); } else { - print_metric(ctxp, NULL, NULL, "Ghz", 0); + print_metric(config, ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) - print_metric(ctxp, NULL, + print_metric(config, ctxp, NULL, "%7.2f%%", "transactional cycles", 100.0 * (avg / total)); else - print_metric(ctxp, NULL, NULL, "transactional cycles", + print_metric(config, ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); @@ -888,10 +899,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (total2 < avg) total2 = avg; if (total) - print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles", + print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles", 100.0 * ((total2-avg) / total)); else - print_metric(ctxp, NULL, NULL, "aborted cycles", 0); + print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); @@ -900,10 +911,10 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, ratio = total / avg; if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) - print_metric(ctxp, NULL, "%8.0f", + print_metric(config, ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else - print_metric(ctxp, NULL, NULL, "cycles / transaction", + print_metric(config, ctxp, NULL, NULL, "cycles / transaction", 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, @@ -912,34 +923,33 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (avg) ratio = total / avg; - print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio); - } else if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK) || - perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK)) { + print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio); + } else if (perf_evsel__is_clock(evsel)) { if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0) - print_metric(ctxp, NULL, "%8.3f", "CPUs utilized", - avg / ratio); + print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", + avg / (ratio * evsel->scale)); else - print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); + print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "frontend bound", + print_metric(config, ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; - print_metric(ctxp, color, "%8.1f%%", "retiring", + print_metric(config, ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; - print_metric(ctxp, color, "%8.1f%%", "bad speculation", + print_metric(config, ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { double be_bound = td_be_bound(ctx, cpu, st); @@ -956,12 +966,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; if (td_total_slots(ctx, cpu, st) > 0) - print_metric(ctxp, color, "%8.1f%%", name, + print_metric(config, ctxp, color, "%8.1f%%", name, be_bound * 100.); else - print_metric(ctxp, NULL, NULL, name, 0); + print_metric(config, ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { - generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, + generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name, evsel->metric_name, avg, cpu, out, st); } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; @@ -976,9 +986,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, unit = 'K'; } snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); + print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out, st); + print_smi_cost(config, cpu, evsel, out, st); } else { num = 0; } @@ -988,12 +998,12 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, list_for_each_entry (mexp, &me->head, nd) { if (num++ > 0) - out->new_line(ctxp); - generic_metric(mexp->metric_expr, mexp->metric_events, + out->new_line(config, ctxp); + generic_metric(config, mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, avg, cpu, out, st); } } if (num == 0) - print_metric(ctxp, NULL, NULL, NULL, 0); + print_metric(config, ctxp, NULL, NULL, NULL, 0); } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index a0061e0b0fad..4d40515307b8 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -374,9 +374,8 @@ int perf_stat_process_counter(struct perf_stat_config *config, return 0; } -int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused, - union perf_event *event, - struct perf_session *session) +int perf_event__process_stat_event(struct perf_session *session, + union perf_event *event) { struct perf_counts_values count; struct stat_event *st = &event->stat; @@ -435,3 +434,98 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp) return ret; } + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target) +{ + struct perf_event_attr *attr = &evsel->attr; + struct perf_evsel *leader = evsel->leader; + + if (config->scale) { + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + + /* + * The event is part of non trivial group, let's enable + * the group read (for leader) and ID retrieval for all + * members. + */ + if (leader->nr_members > 1) + attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP; + + attr->inherit = !config->no_inherit; + + /* + * Some events get initialized with sample_(period/type) set, + * like tracepoints. Clear it up for counting. + */ + attr->sample_period = 0; + + if (config->identifier) + attr->sample_type = PERF_SAMPLE_IDENTIFIER; + + /* + * Disabling all counters initially, they will be enabled + * either manually by us or by kernel via enable_on_exec + * set later. + */ + if (perf_evsel__is_group_leader(evsel)) { + attr->disabled = 1; + + /* + * In case of initial_delay we enable tracee + * events manually. + */ + if (target__none(target) && !config->initial_delay) + attr->enable_on_exec = 1; + } + + if (target__has_cpu(target) && !target__has_per_thread(target)) + return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); + + return perf_evsel__open_per_thread(evsel, evsel->threads); +} + +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs) +{ + int err; + + if (attrs) { + err = perf_event__synthesize_attrs(tool, evlist, process); + if (err < 0) { + pr_err("Couldn't synthesize attrs.\n"); + return err; + } + } + + err = perf_event__synthesize_extra_attr(tool, evlist, process, + attrs); + + err = perf_event__synthesize_thread_map2(tool, evlist->threads, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(tool, evlist->cpus, + process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_stat_config(tool, config, process, NULL); + if (err < 0) { + pr_err("Couldn't synthesize config.\n"); + return err; + } + + return 0; +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 8f56ba4fd258..2f9c9159a364 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -4,11 +4,16 @@ #include <linux/types.h> #include <stdio.h> +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/wait.h> #include "xyarray.h" #include "rblist.h" +#include "perf.h" +#include "event.h" -struct stats -{ +struct stats { double n, mean, M2; u64 max, min; }; @@ -85,15 +90,42 @@ struct runtime_stat { struct rblist value_list; }; +typedef int (*aggr_get_id_t)(struct perf_stat_config *config, + struct cpu_map *m, int cpu); + struct perf_stat_config { - enum aggr_mode aggr_mode; - bool scale; - FILE *output; - unsigned int interval; - unsigned int timeout; - int times; - struct runtime_stat *stats; - int stats_num; + enum aggr_mode aggr_mode; + bool scale; + bool no_inherit; + bool identifier; + bool csv_output; + bool interval_clear; + bool metric_only; + bool null_run; + bool ru_display; + bool big_num; + bool no_merge; + bool walltime_run_table; + FILE *output; + unsigned int interval; + unsigned int timeout; + unsigned int initial_delay; + unsigned int unit_width; + unsigned int metric_only_len; + int times; + int run_count; + int print_free_counters_hint; + int print_mixed_hw_group_error; + struct runtime_stat *stats; + int stats_num; + const char *csv_sep; + struct stats *walltime_nsecs_stats; + struct rusage ru_data; + struct cpu_map *aggr_map; + aggr_get_id_t aggr_get_id; + struct cpu_map *cpus_aggr_map; + u64 *walltime_run; + struct rblist metric_events; }; void update_stats(struct stats *stats, u64 val); @@ -131,9 +163,10 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; -typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, +typedef void (*print_metric_t)(struct perf_stat_config *config, + void *ctx, const char *color, const char *unit, const char *fmt, double val); -typedef void (*new_line_t )(void *ctx); +typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); @@ -149,7 +182,8 @@ struct perf_stat_output_ctx { bool force_header; }; -void perf_stat__print_shadow_stats(struct perf_evsel *evsel, +void perf_stat__print_shadow_stats(struct perf_stat_config *config, + struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, struct rblist *metric_events, @@ -165,11 +199,25 @@ int perf_stat_process_counter(struct perf_stat_config *config, struct perf_tool; union perf_event; struct perf_session; -int perf_event__process_stat_event(struct perf_tool *tool, - union perf_event *event, - struct perf_session *session); +int perf_event__process_stat_event(struct perf_session *session, + union perf_event *event); size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp); size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp); + +int create_perf_stat_counter(struct perf_evsel *evsel, + struct perf_stat_config *config, + struct target *target); +int perf_stat_synthesize_config(struct perf_stat_config *config, + struct perf_tool *tool, + struct perf_evlist *evlist, + perf_event__handler_t process, + bool attrs); +void +perf_evlist__print_counters(struct perf_evlist *evlist, + struct perf_stat_config *config, + struct target *_target, + struct timespec *ts, + int argc, const char **argv); #endif diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 3d1cf5bf7f18..9005fbe0780e 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -98,19 +98,25 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) va_copy(ap_saved, ap); len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - if (len < 0) + if (len < 0) { + va_end(ap_saved); return len; + } if (len > strbuf_avail(sb)) { ret = strbuf_grow(sb, len); - if (ret) + if (ret) { + va_end(ap_saved); return ret; + } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); + va_end(ap_saved); return -EINVAL; } } + va_end(ap_saved); return strbuf_setlen(sb, sb->len + len); } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 2de770511e70..66a84d5846c8 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -114,16 +114,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym) sym->st_shndx != SHN_ABS; } -static bool elf_sym__is_a(GElf_Sym *sym, enum map_type type) +static bool elf_sym__filter(GElf_Sym *sym) { - switch (type) { - case MAP__FUNCTION: - return elf_sym__is_function(sym); - case MAP__VARIABLE: - return elf_sym__is_object(sym); - default: - return false; - } + return elf_sym__is_function(sym) || elf_sym__is_object(sym); } static inline const char *elf_sym__name(const GElf_Sym *sym, @@ -150,17 +143,10 @@ static inline bool elf_sec__is_data(const GElf_Shdr *shdr, return strstr(elf_sec__name(shdr, secstrs), "data") != NULL; } -static bool elf_sec__is_a(GElf_Shdr *shdr, Elf_Data *secstrs, - enum map_type type) +static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs) { - switch (type) { - case MAP__FUNCTION: - return elf_sec__is_text(shdr, secstrs); - case MAP__VARIABLE: - return elf_sec__is_data(shdr, secstrs); - default: - return false; - } + return elf_sec__is_text(shdr, secstrs) || + elf_sec__is_data(shdr, secstrs); } static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) @@ -256,7 +242,7 @@ static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) * And always look at the original dso, not at debuginfo packages, that * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS). */ -int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map *map) +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) { uint32_t nr_rel_entries, idx; GElf_Sym sym; @@ -338,7 +324,17 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * plt_entry_size = 16; break; - default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + case EM_SPARC: + plt_header_size = 48; + plt_entry_size = 12; + break; + + case EM_SPARCV9: + plt_header_size = 128; + plt_entry_size = 32; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */ plt_header_size = shdr_plt.sh_entsize; plt_entry_size = shdr_plt.sh_entsize; break; @@ -364,12 +360,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * free(demangled); f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, sympltname); + STB_GLOBAL, STT_FUNC, sympltname); if (!f) goto out_elf_end; plt_offset += plt_entry_size; - symbols__insert(&dso->symbols[map->type], f); + symbols__insert(&dso->symbols, f); ++nr; } } else if (shdr_rel_plt.sh_type == SHT_REL) { @@ -390,12 +386,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * free(demangled); f = symbol__new(plt_offset, plt_entry_size, - STB_GLOBAL, sympltname); + STB_GLOBAL, STT_FUNC, sympltname); if (!f) goto out_elf_end; plt_offset += plt_entry_size; - symbols__insert(&dso->symbols[map->type], f); + symbols__insert(&dso->symbols, f); ++nr; } } @@ -811,6 +807,110 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +static int dso__process_kernel_symbol(struct dso *dso, struct map *map, + GElf_Sym *sym, GElf_Shdr *shdr, + struct map_groups *kmaps, struct kmap *kmap, + struct dso **curr_dsop, struct map **curr_mapp, + const char *section_name, + bool adjust_kernel_syms, bool kmodule, bool *remap_kernel) +{ + struct dso *curr_dso = *curr_dsop; + struct map *curr_map; + char dso_name[PATH_MAX]; + + /* Adjust symbol to map to file offset */ + if (adjust_kernel_syms) + sym->st_value -= shdr->sh_addr - shdr->sh_offset; + + if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0) + return 0; + + if (strcmp(section_name, ".text") == 0) { + /* + * The initial kernel mapping is based on + * kallsyms and identity maps. Overwrite it to + * map to the kernel dso. + */ + if (*remap_kernel && dso->kernel) { + *remap_kernel = false; + map->start = shdr->sh_addr + ref_reloc(kmap); + map->end = map->start + shdr->sh_size; + map->pgoff = shdr->sh_offset; + map->map_ip = map__map_ip; + map->unmap_ip = map__unmap_ip; + /* Ensure maps are correctly ordered */ + if (kmaps) { + map__get(map); + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + map__put(map); + } + } + + /* + * The initial module mapping is based on + * /proc/modules mapped to offset zero. + * Overwrite it to map to the module dso. + */ + if (*remap_kernel && kmodule) { + *remap_kernel = false; + map->pgoff = shdr->sh_offset; + } + + *curr_mapp = map; + *curr_dsop = dso; + return 0; + } + + if (!kmap) + return 0; + + snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); + + curr_map = map_groups__find_by_name(kmaps, dso_name); + if (curr_map == NULL) { + u64 start = sym->st_value; + + if (kmodule) + start += map->start + shdr->sh_offset; + + curr_dso = dso__new(dso_name); + if (curr_dso == NULL) + return -1; + curr_dso->kernel = dso->kernel; + curr_dso->long_name = dso->long_name; + curr_dso->long_name_len = dso->long_name_len; + curr_map = map__new2(start, curr_dso); + dso__put(curr_dso); + if (curr_map == NULL) + return -1; + + if (adjust_kernel_syms) { + curr_map->start = shdr->sh_addr + ref_reloc(kmap); + curr_map->end = curr_map->start + shdr->sh_size; + curr_map->pgoff = shdr->sh_offset; + } else { + curr_map->map_ip = curr_map->unmap_ip = identity__map_ip; + } + curr_dso->symtab_type = dso->symtab_type; + map_groups__insert(kmaps, curr_map); + /* + * Add it before we drop the referece to curr_map, i.e. while + * we still are sure to have a reference to this DSO via + * *curr_map->dso. + */ + dsos__add(&map->groups->machine->dsos, curr_dso); + /* kmaps already got it */ + map__put(curr_map); + dso__set_loaded(curr_dso); + *curr_mapp = curr_map; + *curr_dsop = curr_dso; + } else + *curr_dsop = curr_map->dso; + + return 0; +} + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { @@ -844,7 +944,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, * have the wrong values for the dso maps, so remove them. */ if (kmodule && syms_ss->symtab) - symbols__delete(&dso->symbols[map->type]); + symbols__delete(&dso->symbols); if (!syms_ss->symtab) { /* @@ -921,10 +1021,10 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); /* - * Initial kernel and module mappings do not map to the dso. For - * function mappings, flag the fixups. + * Initial kernel and module mappings do not map to the dso. + * Flag the fixups. */ - if (map->type == MAP__FUNCTION && (dso->kernel || kmodule)) { + if (dso->kernel || kmodule) { remap_kernel = true; adjust_kernel_syms = dso->adjust_symbols; } @@ -936,7 +1036,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, const char *section_name; bool used_opd = false; - if (!is_label && !elf_sym__is_a(&sym, map->type)) + if (!is_label && !elf_sym__filter(&sym)) continue; /* Reject ARM ELF "mapping symbols": these aren't unique and @@ -974,7 +1074,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, gelf_getshdr(sec, &shdr); - if (is_label && !elf_sec__is_a(&shdr, secstrs, map->type)) + if (is_label && !elf_sec__filter(&shdr, secstrs)) continue; section_name = elf_sec__name(&shdr, secstrs); @@ -982,134 +1082,37 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, /* On ARM, symbols for thumb functions have 1 added to * the symbol address as a flag - remove it */ if ((ehdr.e_machine == EM_ARM) && - (map->type == MAP__FUNCTION) && + (GELF_ST_TYPE(sym.st_info) == STT_FUNC) && (sym.st_value & 1)) --sym.st_value; if (dso->kernel || kmodule) { - char dso_name[PATH_MAX]; - - /* Adjust symbol to map to file offset */ - if (adjust_kernel_syms) - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - - if (strcmp(section_name, - (curr_dso->short_name + - dso->short_name_len)) == 0) - goto new_symbol; - - if (strcmp(section_name, ".text") == 0) { - /* - * The initial kernel mapping is based on - * kallsyms and identity maps. Overwrite it to - * map to the kernel dso. - */ - if (remap_kernel && dso->kernel) { - remap_kernel = false; - map->start = shdr.sh_addr + - ref_reloc(kmap); - map->end = map->start + shdr.sh_size; - map->pgoff = shdr.sh_offset; - map->map_ip = map__map_ip; - map->unmap_ip = map__unmap_ip; - /* Ensure maps are correctly ordered */ - if (kmaps) { - map__get(map); - map_groups__remove(kmaps, map); - map_groups__insert(kmaps, map); - map__put(map); - } - } - - /* - * The initial module mapping is based on - * /proc/modules mapped to offset zero. - * Overwrite it to map to the module dso. - */ - if (remap_kernel && kmodule) { - remap_kernel = false; - map->pgoff = shdr.sh_offset; - } - - curr_map = map; - curr_dso = dso; - goto new_symbol; - } - - if (!kmap) - goto new_symbol; - - snprintf(dso_name, sizeof(dso_name), - "%s%s", dso->short_name, section_name); - - curr_map = map_groups__find_by_name(kmaps, map->type, dso_name); - if (curr_map == NULL) { - u64 start = sym.st_value; - - if (kmodule) - start += map->start + shdr.sh_offset; - - curr_dso = dso__new(dso_name); - if (curr_dso == NULL) - goto out_elf_end; - curr_dso->kernel = dso->kernel; - curr_dso->long_name = dso->long_name; - curr_dso->long_name_len = dso->long_name_len; - curr_map = map__new2(start, curr_dso, - map->type); - dso__put(curr_dso); - if (curr_map == NULL) { - goto out_elf_end; - } - if (adjust_kernel_syms) { - curr_map->start = shdr.sh_addr + - ref_reloc(kmap); - curr_map->end = curr_map->start + - shdr.sh_size; - curr_map->pgoff = shdr.sh_offset; - } else { - curr_map->map_ip = identity__map_ip; - curr_map->unmap_ip = identity__map_ip; - } - curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmaps, curr_map); - /* - * Add it before we drop the referece to curr_map, - * i.e. while we still are sure to have a reference - * to this DSO via curr_map->dso. - */ - dsos__add(&map->groups->machine->dsos, curr_dso); - /* kmaps already got it */ - map__put(curr_map); - dso__set_loaded(curr_dso, map->type); - } else - curr_dso = curr_map->dso; - - goto new_symbol; - } - - if ((used_opd && runtime_ss->adjust_symbols) - || (!used_opd && syms_ss->adjust_symbols)) { + if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map, + section_name, adjust_kernel_syms, kmodule, &remap_kernel)) + goto out_elf_end; + } else if ((used_opd && runtime_ss->adjust_symbols) || + (!used_opd && syms_ss->adjust_symbols)) { pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__, (u64)sym.st_value, (u64)shdr.sh_addr, (u64)shdr.sh_offset); sym.st_value -= shdr.sh_addr - shdr.sh_offset; } -new_symbol: + demangled = demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; f = symbol__new(sym.st_value, sym.st_size, - GELF_ST_BIND(sym.st_info), elf_name); + GELF_ST_BIND(sym.st_info), + GELF_ST_TYPE(sym.st_info), elf_name); free(demangled); if (!f) goto out_elf_end; arch__sym_update(f, &sym); - __symbols__insert(&curr_dso->symbols[curr_map->type], f, dso->kernel); + __symbols__insert(&curr_dso->symbols, f, dso->kernel); nr++; } @@ -1117,14 +1120,14 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_end(&dso->symbols[map->type]); - symbols__fixup_duplicate(&dso->symbols[map->type]); + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); if (kmap) { /* * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - __map_groups__fixup_end(kmaps, map->type); + map_groups__fixup_end(kmaps); } } err = nr; @@ -1393,8 +1396,16 @@ static off_t kcore__write(struct kcore *kcore) struct phdr_data { off_t offset; + off_t rel; u64 addr; u64 len; + struct list_head node; + struct phdr_data *remaps; +}; + +struct sym_data { + u64 addr; + struct list_head node; }; struct kcore_copy_info { @@ -1404,16 +1415,78 @@ struct kcore_copy_info { u64 last_symbol; u64 first_module; u64 last_module_symbol; - struct phdr_data kernel_map; - struct phdr_data modules_map; + size_t phnum; + struct list_head phdrs; + struct list_head syms; }; +#define kcore_copy__for_each_phdr(k, p) \ + list_for_each_entry((p), &(k)->phdrs, node) + +static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset) +{ + struct phdr_data *p = zalloc(sizeof(*p)); + + if (p) { + p->addr = addr; + p->len = len; + p->offset = offset; + } + + return p; +} + +static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci, + u64 addr, u64 len, + off_t offset) +{ + struct phdr_data *p = phdr_data__new(addr, len, offset); + + if (p) + list_add_tail(&p->node, &kci->phdrs); + + return p; +} + +static void kcore_copy__free_phdrs(struct kcore_copy_info *kci) +{ + struct phdr_data *p, *tmp; + + list_for_each_entry_safe(p, tmp, &kci->phdrs, node) { + list_del(&p->node); + free(p); + } +} + +static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci, + u64 addr) +{ + struct sym_data *s = zalloc(sizeof(*s)); + + if (s) { + s->addr = addr; + list_add_tail(&s->node, &kci->syms); + } + + return s; +} + +static void kcore_copy__free_syms(struct kcore_copy_info *kci) +{ + struct sym_data *s, *tmp; + + list_for_each_entry_safe(s, tmp, &kci->syms, node) { + list_del(&s->node); + free(s); + } +} + static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, u64 start) { struct kcore_copy_info *kci = arg; - if (!symbol_type__is_a(type, MAP__FUNCTION)) + if (!kallsyms__is_function(type)) return 0; if (strchr(name, '[')) { @@ -1438,6 +1511,9 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type, return 0; } + if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start)) + return -1; + return 0; } @@ -1487,27 +1563,39 @@ static int kcore_copy__parse_modules(struct kcore_copy_info *kci, return 0; } -static void kcore_copy__map(struct phdr_data *p, u64 start, u64 end, u64 pgoff, - u64 s, u64 e) +static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end, + u64 pgoff, u64 s, u64 e) { - if (p->addr || s < start || s >= end) - return; + u64 len, offset; - p->addr = s; - p->offset = (s - start) + pgoff; - p->len = e < end ? e - s : end - s; + if (s < start || s >= end) + return 0; + + offset = (s - start) + pgoff; + len = e < end ? e - s : end - s; + + return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1; } static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data) { struct kcore_copy_info *kci = data; u64 end = start + len; + struct sym_data *sdat; + + if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext)) + return -1; - kcore_copy__map(&kci->kernel_map, start, end, pgoff, kci->stext, - kci->etext); + if (kcore_copy__map(kci, start, end, pgoff, kci->first_module, + kci->last_module_symbol)) + return -1; - kcore_copy__map(&kci->modules_map, start, end, pgoff, kci->first_module, - kci->last_module_symbol); + list_for_each_entry(sdat, &kci->syms, node) { + u64 s = round_down(sdat->addr, page_size); + + if (kcore_copy__map(kci, start, end, pgoff, s, s + len)) + return -1; + } return 0; } @@ -1520,6 +1608,64 @@ static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf) return 0; } +static void kcore_copy__find_remaps(struct kcore_copy_info *kci) +{ + struct phdr_data *p, *k = NULL; + u64 kend; + + if (!kci->stext) + return; + + /* Find phdr that corresponds to the kernel map (contains stext) */ + kcore_copy__for_each_phdr(kci, p) { + u64 pend = p->addr + p->len - 1; + + if (p->addr <= kci->stext && pend >= kci->stext) { + k = p; + break; + } + } + + if (!k) + return; + + kend = k->offset + k->len; + + /* Find phdrs that remap the kernel */ + kcore_copy__for_each_phdr(kci, p) { + u64 pend = p->offset + p->len; + + if (p == k) + continue; + + if (p->offset >= k->offset && pend <= kend) + p->remaps = k; + } +} + +static void kcore_copy__layout(struct kcore_copy_info *kci) +{ + struct phdr_data *p; + off_t rel = 0; + + kcore_copy__find_remaps(kci); + + kcore_copy__for_each_phdr(kci, p) { + if (!p->remaps) { + p->rel = rel; + rel += p->len; + } + kci->phnum += 1; + } + + kcore_copy__for_each_phdr(kci, p) { + struct phdr_data *k = p->remaps; + + if (k) + p->rel = p->offset - k->offset + k->rel; + } +} + static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, Elf *elf) { @@ -1555,7 +1701,12 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir, if (kci->first_module && !kci->last_module_symbol) return -1; - return kcore_copy__read_maps(kci, elf); + if (kcore_copy__read_maps(kci, elf)) + return -1; + + kcore_copy__layout(kci); + + return 0; } static int kcore_copy__copy_file(const char *from_dir, const char *to_dir, @@ -1678,12 +1829,15 @@ int kcore_copy(const char *from_dir, const char *to_dir) { struct kcore kcore; struct kcore extract; - size_t count = 2; int idx = 0, err = -1; - off_t offset = page_size, sz, modules_offset = 0; + off_t offset, sz; struct kcore_copy_info kci = { .stext = 0, }; char kcore_filename[PATH_MAX]; char extract_filename[PATH_MAX]; + struct phdr_data *p; + + INIT_LIST_HEAD(&kci.phdrs); + INIT_LIST_HEAD(&kci.syms); if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms")) return -1; @@ -1703,20 +1857,17 @@ int kcore_copy(const char *from_dir, const char *to_dir) if (kcore__init(&extract, extract_filename, kcore.elfclass, false)) goto out_kcore_close; - if (!kci.modules_map.addr) - count -= 1; - - if (kcore__copy_hdr(&kcore, &extract, count)) + if (kcore__copy_hdr(&kcore, &extract, kci.phnum)) goto out_extract_close; - if (kcore__add_phdr(&extract, idx++, offset, kci.kernel_map.addr, - kci.kernel_map.len)) - goto out_extract_close; + offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) + + gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT); + offset = round_up(offset, page_size); + + kcore_copy__for_each_phdr(&kci, p) { + off_t offs = p->rel + offset; - if (kci.modules_map.addr) { - modules_offset = offset + kci.kernel_map.len; - if (kcore__add_phdr(&extract, idx, modules_offset, - kci.modules_map.addr, kci.modules_map.len)) + if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len)) goto out_extract_close; } @@ -1724,14 +1875,14 @@ int kcore_copy(const char *from_dir, const char *to_dir) if (sz < 0 || sz > offset) goto out_extract_close; - if (copy_bytes(kcore.fd, kci.kernel_map.offset, extract.fd, offset, - kci.kernel_map.len)) - goto out_extract_close; + kcore_copy__for_each_phdr(&kci, p) { + off_t offs = p->rel + offset; - if (modules_offset && copy_bytes(kcore.fd, kci.modules_map.offset, - extract.fd, modules_offset, - kci.modules_map.len)) - goto out_extract_close; + if (p->remaps) + continue; + if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len)) + goto out_extract_close; + } if (kcore_copy__compare_file(from_dir, to_dir, "modules")) goto out_extract_close; @@ -1754,6 +1905,9 @@ out_unlink_kallsyms: if (err) kcore_copy__unlink(to_dir, "kallsyms"); + kcore_copy__free_phdrs(&kci); + kcore_copy__free_syms(&kci); + return err; } @@ -1803,6 +1957,34 @@ void kcore_extract__delete(struct kcore_extract *kce) } #ifdef HAVE_GELF_GETNOTE_SUPPORT + +static void sdt_adjust_loc(struct sdt_note *tmp, GElf_Addr base_off) +{ + if (!base_off) + return; + + if (tmp->bit32) + tmp->addr.a32[SDT_NOTE_IDX_LOC] = + tmp->addr.a32[SDT_NOTE_IDX_LOC] + base_off - + tmp->addr.a32[SDT_NOTE_IDX_BASE]; + else + tmp->addr.a64[SDT_NOTE_IDX_LOC] = + tmp->addr.a64[SDT_NOTE_IDX_LOC] + base_off - + tmp->addr.a64[SDT_NOTE_IDX_BASE]; +} + +static void sdt_adjust_refctr(struct sdt_note *tmp, GElf_Addr base_addr, + GElf_Addr base_off) +{ + if (!base_off) + return; + + if (tmp->bit32 && tmp->addr.a32[SDT_NOTE_IDX_REFCTR]) + tmp->addr.a32[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off); + else if (tmp->addr.a64[SDT_NOTE_IDX_REFCTR]) + tmp->addr.a64[SDT_NOTE_IDX_REFCTR] -= (base_addr - base_off); +} + /** * populate_sdt_note : Parse raw data and identify SDT note * @elf: elf of the opened file @@ -1820,7 +2002,6 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, const char *provider, *name, *args; struct sdt_note *tmp = NULL; GElf_Ehdr ehdr; - GElf_Addr base_off = 0; GElf_Shdr shdr; int ret = -EINVAL; @@ -1916,17 +2097,12 @@ static int populate_sdt_note(Elf **elf, const char *data, size_t len, * base address in the description of the SDT note. If its different, * then accordingly, adjust the note location. */ - if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) { - base_off = shdr.sh_offset; - if (base_off) { - if (tmp->bit32) - tmp->addr.a32[0] = tmp->addr.a32[0] + base_off - - tmp->addr.a32[1]; - else - tmp->addr.a64[0] = tmp->addr.a64[0] + base_off - - tmp->addr.a64[1]; - } - } + if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) + sdt_adjust_loc(tmp, shdr.sh_offset); + + /* Adjust reference counter offset */ + if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_PROBES_SCN, NULL)) + sdt_adjust_refctr(tmp, shdr.sh_addr, shdr.sh_offset); list_add_tail(&tmp->note_list, sdt_notes); return 0; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index ff48d0d49584..7119df77dc0b 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -288,8 +288,7 @@ void symsrc__destroy(struct symsrc *ss) } int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused, - struct symsrc *ss __maybe_unused, - struct map *map __maybe_unused) + struct symsrc *ss __maybe_unused) { return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1466814ebada..d188b7588152 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <string.h> #include <linux/kernel.h> +#include <linux/mman.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/param.h> @@ -39,7 +40,6 @@ char **vmlinux_path; struct symbol_conf symbol_conf = { .use_modules = true, .try_vmlinux_path = true, - .annotate_src = true, .demangle = true, .demangle_kernel = false, .cumulate_callchain = true, @@ -70,18 +70,10 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -bool symbol_type__is_a(char symbol_type, enum map_type map_type) +static bool symbol_type__filter(char symbol_type) { symbol_type = toupper(symbol_type); - - switch (map_type) { - case MAP__FUNCTION: - return symbol_type == 'T' || symbol_type == 'W'; - case MAP__VARIABLE: - return symbol_type == 'D'; - default: - return false; - } + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; } static int prefix_underscores_count(const char *str) @@ -228,9 +220,9 @@ void symbols__fixup_end(struct rb_root *symbols) curr->end = roundup(curr->start, 4096) + 4096; } -void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) +void map_groups__fixup_end(struct map_groups *mg) { - struct maps *maps = &mg->maps[type]; + struct maps *maps = &mg->maps; struct map *next, *curr; down_write(&maps->lock); @@ -256,7 +248,7 @@ out_unlock: up_write(&maps->lock); } -struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) +struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name) { size_t namelen = strlen(name) + 1; struct symbol *sym = calloc(1, (symbol_conf.priv_size + @@ -274,6 +266,7 @@ struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) sym->start = start; sym->end = len ? start + len : start; + sym->type = type; sym->binding = binding; sym->namelen = namelen - 1; @@ -484,45 +477,40 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, void dso__reset_find_symbol_cache(struct dso *dso) { - enum map_type type; - - for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { - dso->last_find_result[type].addr = 0; - dso->last_find_result[type].symbol = NULL; - } + dso->last_find_result.addr = 0; + dso->last_find_result.symbol = NULL; } -void dso__insert_symbol(struct dso *dso, enum map_type type, struct symbol *sym) +void dso__insert_symbol(struct dso *dso, struct symbol *sym) { - __symbols__insert(&dso->symbols[type], sym, dso->kernel); + __symbols__insert(&dso->symbols, sym, dso->kernel); /* update the symbol cache if necessary */ - if (dso->last_find_result[type].addr >= sym->start && - (dso->last_find_result[type].addr < sym->end || + if (dso->last_find_result.addr >= sym->start && + (dso->last_find_result.addr < sym->end || sym->start == sym->end)) { - dso->last_find_result[type].symbol = sym; + dso->last_find_result.symbol = sym; } } -struct symbol *dso__find_symbol(struct dso *dso, - enum map_type type, u64 addr) +struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { - if (dso->last_find_result[type].addr != addr || dso->last_find_result[type].symbol == NULL) { - dso->last_find_result[type].addr = addr; - dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); + if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { + dso->last_find_result.addr = addr; + dso->last_find_result.symbol = symbols__find(&dso->symbols, addr); } - return dso->last_find_result[type].symbol; + return dso->last_find_result.symbol; } -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) +struct symbol *dso__first_symbol(struct dso *dso) { - return symbols__first(&dso->symbols[type]); + return symbols__first(&dso->symbols); } -struct symbol *dso__last_symbol(struct dso *dso, enum map_type type) +struct symbol *dso__last_symbol(struct dso *dso) { - return symbols__last(&dso->symbols[type]); + return symbols__last(&dso->symbols); } struct symbol *dso__next_symbol(struct symbol *sym) @@ -539,24 +527,22 @@ struct symbol *symbol__next_by_name(struct symbol *sym) } /* - * Teturns first symbol that matched with @name. + * Returns first symbol that matched with @name. */ -struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, - const char *name) +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name) { - struct symbol *s = symbols__find_by_name(&dso->symbol_names[type], name, + struct symbol *s = symbols__find_by_name(&dso->symbol_names, name, SYMBOL_TAG_INCLUDE__NONE); if (!s) - s = symbols__find_by_name(&dso->symbol_names[type], name, + s = symbols__find_by_name(&dso->symbol_names, name, SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); return s; } -void dso__sort_by_name(struct dso *dso, enum map_type type) +void dso__sort_by_name(struct dso *dso) { - dso__set_sorted_by_name(dso, type); - return symbols__sort_by_name(&dso->symbol_names[type], - &dso->symbols[type]); + dso__set_sorted_by_name(dso); + return symbols__sort_by_name(&dso->symbol_names, &dso->symbols); } int modules__parse(const char *filename, void *arg, @@ -621,11 +607,6 @@ out: return err; } -struct process_kallsyms_args { - struct map *map; - struct dso *dso; -}; - /* * These are symbols in the kernel image, so make sure that * sym is from a kernel DSO. @@ -661,10 +642,10 @@ static int map__process_kallsym_symbol(void *arg, const char *name, char type, u64 start) { struct symbol *sym; - struct process_kallsyms_args *a = arg; - struct rb_root *root = &a->dso->symbols[a->map->type]; + struct dso *dso = arg; + struct rb_root *root = &dso->symbols; - if (!symbol_type__is_a(type, a->map->type)) + if (!symbol_type__filter(type)) return 0; /* @@ -672,7 +653,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * symbols, setting length to 0, and rely on * symbols__fixup_end() to fix it up. */ - sym = symbol__new(start, 0, kallsyms2elf_binding(type), name); + sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name); if (sym == NULL) return -ENOMEM; /* @@ -689,21 +670,18 @@ static int map__process_kallsym_symbol(void *arg, const char *name, * so that we can in the next step set the symbol ->end address and then * call kernel_maps__split_kallsyms. */ -static int dso__load_all_kallsyms(struct dso *dso, const char *filename, - struct map *map) +static int dso__load_all_kallsyms(struct dso *dso, const char *filename) { - struct process_kallsyms_args args = { .map = map, .dso = dso, }; - return kallsyms__parse(filename, &args, map__process_kallsym_symbol); + return kallsyms__parse(filename, dso, map__process_kallsym_symbol); } -static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) +static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso) { - struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; int count = 0; - struct rb_root old_root = dso->symbols[map->type]; - struct rb_root *root = &dso->symbols[map->type]; + struct rb_root old_root = dso->symbols; + struct rb_root *root = &dso->symbols; struct rb_node *next = rb_first(root); if (!kmaps) @@ -723,7 +701,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) if (module) *module = '\0'; - curr_map = map_groups__find(kmaps, map->type, pos->start); + curr_map = map_groups__find(kmaps, pos->start); if (!curr_map) { symbol__delete(pos); @@ -733,7 +711,7 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) pos->start -= curr_map->start - curr_map->pgoff; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; - symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + symbols__insert(&curr_map->dso->symbols, pos); ++count; } @@ -748,22 +726,25 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map) * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) +static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta, + struct map *initial_map) { - struct map_groups *kmaps = map__kmaps(map); struct machine *machine; - struct map *curr_map = map; + struct map *curr_map = initial_map; struct symbol *pos; int count = 0, moved = 0; - struct rb_root *root = &dso->symbols[map->type]; + struct rb_root *root = &dso->symbols; struct rb_node *next = rb_first(root); int kernel_range = 0; + bool x86_64; if (!kmaps) return -1; machine = kmaps->machine; + x86_64 = machine__is(machine, "x86_64"); + while (next) { char *module; @@ -778,7 +759,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) *module++ = '\0'; if (strcmp(curr_map->dso->short_name, module)) { - if (curr_map != map && + if (curr_map != initial_map && dso->kernel == DSO_TYPE_GUEST_KERNEL && machine__is_default_guest(machine)) { /* @@ -788,18 +769,16 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) * symbols are in its kmap. Mark it as * loaded. */ - dso__set_loaded(curr_map->dso, - curr_map->type); + dso__set_loaded(curr_map->dso); } - curr_map = map_groups__find_by_name(kmaps, - map->type, module); + curr_map = map_groups__find_by_name(kmaps, module); if (curr_map == NULL) { pr_debug("%s/proc/{kallsyms,modules} " "inconsistency while looking " "for \"%s\" module!\n", machine->root_dir, module); - curr_map = map; + curr_map = initial_map; goto discard_symbol; } @@ -809,11 +788,21 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) } /* * So that we look just like we get from .ko files, - * i.e. not prelinked, relative to map->start. + * i.e. not prelinked, relative to initial_map->start. */ pos->start = curr_map->map_ip(curr_map, pos->start); pos->end = curr_map->map_ip(curr_map, pos->end); - } else if (curr_map != map) { + } else if (x86_64 && is_entry_trampoline(pos->name)) { + /* + * These symbols are not needed anymore since the + * trampoline maps refer to the text section and it's + * symbols instead. Avoid having to deal with + * relocations, and the assumption that the first symbol + * is the start of kernel text, by simply removing the + * symbols at this point. + */ + goto discard_symbol; + } else if (curr_map != initial_map) { char dso_name[PATH_MAX]; struct dso *ndso; @@ -824,7 +813,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) } if (count == 0) { - curr_map = map; + curr_map = initial_map; goto add_symbol; } @@ -843,7 +832,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) ndso->kernel = dso->kernel; - curr_map = map__new2(pos->start, ndso, map->type); + curr_map = map__new2(pos->start, ndso); if (curr_map == NULL) { dso__put(ndso); return -1; @@ -858,9 +847,9 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta) pos->end -= delta; } add_symbol: - if (curr_map != map) { + if (curr_map != initial_map) { rb_erase(&pos->rb_node, root); - symbols__insert(&curr_map->dso->symbols[curr_map->type], pos); + symbols__insert(&curr_map->dso->symbols, pos); ++moved; } else ++count; @@ -871,10 +860,10 @@ discard_symbol: symbol__delete(pos); } - if (curr_map != map && + if (curr_map != initial_map && dso->kernel == DSO_TYPE_GUEST_KERNEL && machine__is_default_guest(kmaps->machine)) { - dso__set_loaded(curr_map->dso, curr_map->type); + dso__set_loaded(curr_map->dso); } return count + moved; @@ -1035,7 +1024,12 @@ out_delete_from: return ret; } -static int do_validate_kcore_modules(const char *filename, struct map *map, +struct map *map_groups__first(struct map_groups *mg) +{ + return maps__first(&mg->maps); +} + +static int do_validate_kcore_modules(const char *filename, struct map_groups *kmaps) { struct rb_root modules = RB_ROOT; @@ -1046,13 +1040,12 @@ static int do_validate_kcore_modules(const char *filename, struct map *map, if (err) return err; - old_map = map_groups__first(kmaps, map->type); + old_map = map_groups__first(kmaps); while (old_map) { struct map *next = map_groups__next(old_map); struct module_info *mi; - if (old_map == map || old_map->start == map->start) { - /* The kernel map */ + if (!__map__is_kmodule(old_map)) { old_map = next; continue; } @@ -1109,7 +1102,7 @@ static int validate_kcore_modules(const char *kallsyms_filename, kallsyms_filename)) return -EINVAL; - if (do_validate_kcore_modules(modules_filename, map, kmaps)) + if (do_validate_kcore_modules(modules_filename, kmaps)) return -EINVAL; return 0; @@ -1138,7 +1131,6 @@ static int validate_kcore_addresses(const char *kallsyms_filename, struct kcore_mapfn_data { struct dso *dso; - enum map_type type; struct list_head maps; }; @@ -1147,7 +1139,7 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) struct kcore_mapfn_data *md = data; struct map *map; - map = map__new2(start, md->dso, md->type); + map = map__new2(start, md->dso); if (map == NULL) return -ENOMEM; @@ -1163,13 +1155,13 @@ static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { struct map_groups *kmaps = map__kmaps(map); - struct machine *machine; struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL; + struct machine *machine; bool is_64_bit; int err, fd; char kcore_filename[PATH_MAX]; - struct symbol *sym; + u64 stext; if (!kmaps) return -EINVAL; @@ -1177,7 +1169,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, machine = kmaps->machine; /* This function requires that the map is the kernel map */ - if (map != machine->vmlinux_maps[map->type]) + if (!__map__is_kernel(map)) return -EINVAL; if (!filename_from_kallsyms_filename(kcore_filename, "kcore", @@ -1189,7 +1181,6 @@ static int dso__load_kcore(struct dso *dso, struct map *map, return -EINVAL; md.dso = dso; - md.type = map->type; INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); @@ -1200,7 +1191,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Read new maps into temporary lists */ - err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, + err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md, &is_64_bit); if (err) goto out_err; @@ -1212,7 +1203,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, } /* Remove old maps */ - old_map = map_groups__first(kmaps, map->type); + old_map = map_groups__first(kmaps); while (old_map) { struct map *next = map_groups__next(old_map); @@ -1220,14 +1211,15 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map_groups__remove(kmaps, old_map); old_map = next; } + machine->trampolines_mapped = false; - /* Find the kernel map using the first symbol */ - sym = dso__first_symbol(dso, map->type); - list_for_each_entry(new_map, &md.maps, node) { - if (sym && sym->start >= new_map->start && - sym->start < new_map->end) { - replacement_map = new_map; - break; + /* Find the kernel map using the '_stext' symbol */ + if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) { + list_for_each_entry(new_map, &md.maps, node) { + if (stext >= new_map->start && stext < new_map->end) { + replacement_map = new_map; + break; + } } } @@ -1256,6 +1248,19 @@ static int dso__load_kcore(struct dso *dso, struct map *map, map__put(new_map); } + if (machine__is(machine, "x86_64")) { + u64 addr; + + /* + * If one of the corresponding symbols is there, assume the + * entry trampoline maps are too. + */ + if (!kallsyms__get_function_start(kallsyms_filename, + ENTRY_TRAMPOLINE_NAME, + &addr)) + machine->trampolines_mapped = true; + } + /* * Set the data type and long name so that kcore can be read via * dso__data_read_addr(). @@ -1268,7 +1273,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map, close(fd); - if (map->type == MAP__FUNCTION) + if (map->prot & PROT_EXEC) pr_debug("Using %s for kernel object code\n", kcore_filename); else pr_debug("Using %s for kernel data\n", kcore_filename); @@ -1289,14 +1294,10 @@ out_err: * If the kernel is relocated at boot time, kallsyms won't match. Compute the * delta based on the relocation reference symbol. */ -static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) +static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta) { - struct kmap *kmap = map__kmap(map); u64 addr; - if (!kmap) - return -1; - if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; @@ -1310,19 +1311,23 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, bool no_kcore) { + struct kmap *kmap = map__kmap(map); u64 delta = 0; if (symbol__restricted_filename(filename, "/proc/kallsyms")) return -1; - if (dso__load_all_kallsyms(dso, filename, map) < 0) + if (!kmap || !kmap->kmaps) return -1; - if (kallsyms__delta(map, filename, &delta)) + if (dso__load_all_kallsyms(dso, filename) < 0) return -1; - symbols__fixup_end(&dso->symbols[map->type]); - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (kallsyms__delta(kmap, filename, &delta)) + return -1; + + symbols__fixup_end(&dso->symbols); + symbols__fixup_duplicate(&dso->symbols); if (dso->kernel == DSO_TYPE_GUEST_KERNEL) dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; @@ -1330,9 +1335,9 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS; if (!no_kcore && !dso__load_kcore(dso, map, filename)) - return dso__split_kallsyms_for_kcore(dso, map); + return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso); else - return dso__split_kallsyms(dso, map, delta); + return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map); } int dso__load_kallsyms(struct dso *dso, const char *filename, @@ -1341,8 +1346,7 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, return __dso__load_kallsyms(dso, filename, map, false); } -static int dso__load_perf_map(const char *map_path, struct dso *dso, - struct map *map) +static int dso__load_perf_map(const char *map_path, struct dso *dso) { char *line = NULL; size_t n; @@ -1379,12 +1383,12 @@ static int dso__load_perf_map(const char *map_path, struct dso *dso, if (len + 2 >= line_len) continue; - sym = symbol__new(start, size, STB_GLOBAL, line + len); + sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len); if (sym == NULL) goto out_delete_line; - symbols__insert(&dso->symbols[map->type], sym); + symbols__insert(&dso->symbols, sym); nr_syms++; } @@ -1509,25 +1513,27 @@ int dso__load(struct dso *dso, struct map *map) pthread_mutex_lock(&dso->lock); /* check again under the dso->lock */ - if (dso__loaded(dso, map->type)) { + if (dso__loaded(dso)) { ret = 1; goto out; } + if (map->groups && map->groups->machine) + machine = map->groups->machine; + else + machine = NULL; + if (dso->kernel) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) ret = dso__load_guest_kernel_sym(dso, map); + if (machine__is(machine, "x86_64")) + machine__map_x86_64_entry_trampolines(machine, dso); goto out; } - if (map->groups && map->groups->machine) - machine = map->groups->machine; - else - machine = NULL; - dso->adjust_symbols = 0; if (perfmap) { @@ -1542,7 +1548,7 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - ret = dso__load_perf_map(map_path, dso, map); + ret = dso__load_perf_map(map_path, dso); dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : DSO_BINARY_TYPE__NOT_FOUND; goto out; @@ -1651,7 +1657,7 @@ int dso__load(struct dso *dso, struct map *map) if (ret > 0) { int nr_plt; - nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss, map); + nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss); if (nr_plt > 0) ret += nr_plt; } @@ -1663,17 +1669,16 @@ out_free: if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) ret = 0; out: - dso__set_loaded(dso, map->type); + dso__set_loaded(dso); pthread_mutex_unlock(&dso->lock); nsinfo__mountns_exit(&nsc); return ret; } -struct map *map_groups__find_by_name(struct map_groups *mg, - enum map_type type, const char *name) +struct map *map_groups__find_by_name(struct map_groups *mg, const char *name) { - struct maps *maps = &mg->maps[type]; + struct maps *maps = &mg->maps; struct map *map; down_read(&maps->lock); @@ -1720,7 +1725,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map, else dso->binary_type = DSO_BINARY_TYPE__VMLINUX; dso__set_long_name(dso, vmlinux, vmlinux_allocated); - dso__set_loaded(dso, map->type); + dso__set_loaded(dso); pr_debug("Using %s for symbols\n", symfs_vmlinux); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 70c16741f50a..d026d215bdc6 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -57,7 +57,8 @@ struct symbol { u64 start; u64 end; u16 namelen; - u8 binding; + u8 type:4; + u8 binding:4; u8 idle:1; u8 ignore:1; u8 inlined:1; @@ -89,7 +90,6 @@ struct intlist; struct symbol_conf { unsigned short priv_size; - unsigned short nr_events; bool try_vmlinux_path, init_annotation, force, @@ -108,8 +108,6 @@ struct symbol_conf { show_cpu_utilization, initialized, kptr_restrict, - annotate_asm_raw, - annotate_src, event_group, demangle, demangle_kernel, @@ -125,7 +123,8 @@ struct symbol_conf { const char *vmlinux_name, *kallsyms_name, *source_prefix, - *field_sep; + *field_sep, + *graph_function; const char *default_guest_vmlinux_name, *default_guest_kallsyms, *default_guest_modules; @@ -259,17 +258,16 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, bool no_kcore); int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); -void dso__insert_symbol(struct dso *dso, enum map_type type, +void dso__insert_symbol(struct dso *dso, struct symbol *sym); -struct symbol *dso__find_symbol(struct dso *dso, enum map_type type, - u64 addr); -struct symbol *dso__find_symbol_by_name(struct dso *dso, enum map_type type, - const char *name); +struct symbol *dso__find_symbol(struct dso *dso, u64 addr); +struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); + struct symbol *symbol__next_by_name(struct symbol *sym); -struct symbol *dso__first_symbol(struct dso *dso, enum map_type type); -struct symbol *dso__last_symbol(struct dso *dso, enum map_type type); +struct symbol *dso__first_symbol(struct dso *dso); +struct symbol *dso__last_symbol(struct dso *dso); struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); @@ -288,7 +286,7 @@ void symbol__exit(void); void symbol__elf_init(void); int symbol__annotation_init(void); -struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); +struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name); size_t __symbol__fprintf_symname_offs(const struct symbol *sym, const struct addr_location *al, bool unknown_as_addr, @@ -300,7 +298,6 @@ size_t __symbol__fprintf_symname(const struct symbol *sym, bool unknown_as_addr, FILE *fp); size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp); size_t symbol__fprintf(struct symbol *sym, FILE *fp); -bool symbol_type__is_a(char symbol_type, enum map_type map_type); bool symbol__restricted_filename(const char *filename, const char *restricted_filename); int symbol__config_symfs(const struct option *opt __maybe_unused, @@ -308,8 +305,7 @@ int symbol__config_symfs(const struct option *opt __maybe_unused, int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule); -int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, - struct map *map); +int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); @@ -317,7 +313,7 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) void symbols__insert(struct rb_root *symbols, struct symbol *sym); void symbols__fixup_duplicate(struct rb_root *symbols); void symbols__fixup_end(struct rb_root *symbols); -void __map_groups__fixup_end(struct map_groups *mg, enum map_type type); +void map_groups__fixup_end(struct map_groups *mg); typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data, @@ -384,12 +380,19 @@ int get_sdt_note_list(struct list_head *head, const char *target); int cleanup_sdt_note_list(struct list_head *sdt_notes); int sdt_notes__get_count(struct list_head *start); +#define SDT_PROBES_SCN ".probes" #define SDT_BASE_SCN ".stapsdt.base" #define SDT_NOTE_SCN ".note.stapsdt" #define SDT_NOTE_TYPE 3 #define SDT_NOTE_NAME "stapsdt" #define NR_ADDR 3 +enum { + SDT_NOTE_IDX_LOC = 0, + SDT_NOTE_IDX_BASE, + SDT_NOTE_IDX_REFCTR, +}; + struct mem_info *mem_info__new(void); struct mem_info *mem_info__get(struct mem_info *mi); void mem_info__put(struct mem_info *mi); diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index 6dd2cb88ccbe..ed0205cc7942 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c @@ -58,13 +58,13 @@ size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp) } size_t dso__fprintf_symbols_by_name(struct dso *dso, - enum map_type type, FILE *fp) + FILE *fp) { size_t ret = 0; struct rb_node *nd; struct symbol_name_rb_node *pos; - for (nd = rb_first(&dso->symbol_names[type]); nd; nd = rb_next(nd)) { + for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); fprintf(fp, "%s\n", pos->sym.name); } diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 0ee7f568d60c..3393d7ee9401 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -38,6 +38,10 @@ static const char **syscalltbl_native = syscalltbl_powerpc_64; #include <asm/syscalls_32.c> const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID; static const char **syscalltbl_native = syscalltbl_powerpc_32; +#elif defined(__aarch64__) +#include <asm/syscalls.c> +const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_arm64; #endif struct syscall { diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index dd17d6a38d3a..61a4286a74dc 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -36,6 +36,7 @@ * @branch_count: the branch count when the entry was created * @cp: call path * @no_call: a 'call' was not seen + * @trace_end: a 'call' but trace ended */ struct thread_stack_entry { u64 ret_addr; @@ -44,6 +45,7 @@ struct thread_stack_entry { u64 branch_count; struct call_path *cp; bool no_call; + bool trace_end; }; /** @@ -112,7 +114,8 @@ static struct thread_stack *thread_stack__new(struct thread *thread, return ts; } -static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) +static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, + bool trace_end) { int err = 0; @@ -124,6 +127,7 @@ static int thread_stack__push(struct thread_stack *ts, u64 ret_addr) } } + ts->stack[ts->cnt].trace_end = trace_end; ts->stack[ts->cnt++].ret_addr = ret_addr; return err; @@ -150,6 +154,18 @@ static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) } } +static void thread_stack__pop_trace_end(struct thread_stack *ts) +{ + size_t i; + + for (i = ts->cnt; i; ) { + if (ts->stack[--i].trace_end) + ts->cnt = i; + else + return; + } +} + static bool thread_stack__in_kernel(struct thread_stack *ts) { if (!ts->cnt) @@ -254,10 +270,19 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, ret_addr = from_ip + insn_len; if (ret_addr == to_ip) return 0; /* Zero-length calls are excluded */ - return thread_stack__push(thread->ts, ret_addr); - } else if (flags & PERF_IP_FLAG_RETURN) { - if (!from_ip) - return 0; + return thread_stack__push(thread->ts, ret_addr, + flags & PERF_IP_FLAG_TRACE_END); + } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { + /* + * If the caller did not change the trace number (which would + * have flushed the stack) then try to make sense of the stack. + * Possibly, tracing began after returning to the current + * address, so try to pop that. Also, do not expect a call made + * when the trace ended, to return, so pop that. + */ + thread_stack__pop(thread->ts, to_ip); + thread_stack__pop_trace_end(thread->ts); + } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { thread_stack__pop(thread->ts, to_ip); } @@ -285,20 +310,46 @@ void thread_stack__free(struct thread *thread) } } +static inline u64 callchain_context(u64 ip, u64 kernel_start) +{ + return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; +} + void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip) + size_t sz, u64 ip, u64 kernel_start) { - size_t i; + u64 context = callchain_context(ip, kernel_start); + u64 last_context; + size_t i, j; - if (!thread || !thread->ts) - chain->nr = 1; - else - chain->nr = min(sz, thread->ts->cnt + 1); + if (sz < 2) { + chain->nr = 0; + return; + } + + chain->ips[0] = context; + chain->ips[1] = ip; - chain->ips[0] = ip; + if (!thread || !thread->ts) { + chain->nr = 2; + return; + } + + last_context = context; + + for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) { + ip = thread->ts->stack[thread->ts->cnt - j].ret_addr; + context = callchain_context(ip, kernel_start); + if (context != last_context) { + if (i >= sz - 1) + break; + chain->ips[i++] = context; + last_context = context; + } + chain->ips[i] = ip; + } - for (i = 1; i < chain->nr; i++) - chain->ips[i] = thread->ts->stack[thread->ts->cnt - i].ret_addr; + chain->nr = i; } struct call_return_processor * @@ -332,7 +383,7 @@ void call_return_processor__free(struct call_return_processor *crp) static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, u64 timestamp, u64 ref, struct call_path *cp, - bool no_call) + bool no_call, bool trace_end) { struct thread_stack_entry *tse; int err; @@ -350,6 +401,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->branch_count = ts->branch_count; tse->cp = cp; tse->no_call = no_call; + tse->trace_end = trace_end; return 0; } @@ -423,7 +475,7 @@ static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts, return -ENOMEM; return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp, - true); + true, false); } static int thread_stack__no_call_return(struct thread *thread, @@ -455,7 +507,7 @@ static int thread_stack__no_call_return(struct thread *thread, if (!cp) return -ENOMEM; return thread_stack__push_cp(ts, 0, sample->time, ref, - cp, true); + cp, true, false); } } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { /* Return to userspace, so pop all kernel addresses */ @@ -480,7 +532,7 @@ static int thread_stack__no_call_return(struct thread *thread, return -ENOMEM; err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, - true); + true, false); if (err) return err; @@ -500,7 +552,7 @@ static int thread_stack__trace_begin(struct thread *thread, /* Pop trace end */ tse = &ts->stack[ts->cnt - 1]; - if (tse->cp->sym == NULL && tse->cp->ip == 0) { + if (tse->trace_end) { err = thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); if (err) @@ -529,7 +581,7 @@ static int thread_stack__trace_end(struct thread_stack *ts, ret_addr = sample->ip + sample->insn_len; return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp, - false); + false, true); } int thread_stack__process(struct thread *thread, struct comm *comm, @@ -579,6 +631,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, ts->last_time = sample->time; if (sample->flags & PERF_IP_FLAG_CALL) { + bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; struct call_path_root *cpr = ts->crp->cpr; struct call_path *cp; u64 ret_addr; @@ -596,7 +649,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, if (!cp) return -ENOMEM; err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, - cp, false); + cp, false, trace_end); } else if (sample->flags & PERF_IP_FLAG_RETURN) { if (!sample->ip || !sample->addr) return 0; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index b7e41c4ebfdd..f97c00a8c251 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -84,7 +84,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr); void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, - size_t sz, u64 ip); + size_t sz, u64 ip, u64 kernel_start); int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); size_t thread_stack__depth(struct thread *thread); diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 68b65b10579b..3d9ed7d0e281 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -302,23 +302,20 @@ int thread__insert_map(struct thread *thread, struct map *map) static int __thread__prepare_access(struct thread *thread) { bool initialized = false; - int i, err = 0; - - for (i = 0; i < MAP__NR_TYPES; ++i) { - struct maps *maps = &thread->mg->maps[i]; - struct map *map; - - down_read(&maps->lock); + int err = 0; + struct maps *maps = &thread->mg->maps; + struct map *map; - for (map = maps__first(maps); map; map = map__next(map)) { - err = unwind__prepare_access(thread, map, &initialized); - if (err || initialized) - break; - } + down_read(&maps->lock); - up_read(&maps->lock); + for (map = maps__first(maps); map; map = map__next(map)) { + err = unwind__prepare_access(thread, map, &initialized); + if (err || initialized) + break; } + up_read(&maps->lock); + return err; } @@ -333,10 +330,9 @@ static int thread__prepare_access(struct thread *thread) } static int thread__clone_map_groups(struct thread *thread, - struct thread *parent) + struct thread *parent, + bool do_maps_clone) { - int i; - /* This is new thread, we share map groups for process. */ if (thread->pid_ == parent->pid_) return thread__prepare_access(thread); @@ -346,16 +342,11 @@ static int thread__clone_map_groups(struct thread *thread, thread->pid_, thread->tid, parent->pid_, parent->tid); return 0; } - /* But this one is new process, copy maps. */ - for (i = 0; i < MAP__NR_TYPES; ++i) - if (map_groups__clone(thread, parent->mg, i) < 0) - return -ENOMEM; - - return 0; + return do_maps_clone ? map_groups__clone(thread, parent->mg) : 0; } -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone) { if (parent->comm_set) { const char *comm = thread__comm_str(parent); @@ -368,11 +359,10 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) } thread->ppid = parent->tid; - return thread__clone_map_groups(thread, parent); + return thread__clone_map_groups(thread, parent, do_maps_clone); } -void thread__find_cpumode_addr_location(struct thread *thread, - enum map_type type, u64 addr, +void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct addr_location *al) { size_t i; @@ -384,7 +374,7 @@ void thread__find_cpumode_addr_location(struct thread *thread, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_addr_location(thread, cpumodes[i], type, addr, al); + thread__find_symbol(thread, cpumodes[i], addr, al); if (al->map) break; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 14d44c3235b8..30e2b4c165fe 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -42,6 +42,8 @@ struct thread { void *addr_space; struct unwind_libunwind_ops *unwind_libunwind_ops; #endif + bool filter; + int filter_entry_depth; }; struct machine; @@ -87,21 +89,18 @@ struct comm *thread__comm(const struct thread *thread); struct comm *thread__exec_comm(const struct thread *thread); const char *thread__comm_str(const struct thread *thread); int thread__insert_map(struct thread *thread, struct map *map); -int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp); +int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); size_t thread__fprintf(struct thread *thread, FILE *fp); struct thread *thread__main_thread(struct machine *machine, struct thread *thread); -void thread__find_addr_map(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al); +struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr, + struct addr_location *al); -void thread__find_addr_location(struct thread *thread, - u8 cpumode, enum map_type type, u64 addr, - struct addr_location *al); +struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode, + u64 addr, struct addr_location *al); -void thread__find_cpumode_addr_location(struct thread *thread, - enum map_type type, u64 addr, +void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, struct addr_location *al); static inline void *thread__priv(struct thread *thread) diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 183c91453522..56e4ca54020a 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -26,15 +26,12 @@ typedef int (*event_attr_op)(struct perf_tool *tool, union perf_event *event, struct perf_evlist **pevlist); -typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); +typedef int (*event_op2)(struct perf_session *session, union perf_event *event); +typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); -typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); - enum show_feature_header { SHOW_FEAT_NO_HEADER = 0, SHOW_FEAT_HEADER, diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h index 9892323cdd7c..9add1f72ce95 100644 --- a/tools/perf/util/top.h +++ b/tools/perf/util/top.h @@ -3,6 +3,7 @@ #define __PERF_TOP_H 1 #include "tool.h" +#include "annotate.h" #include <linux/types.h> #include <stddef.h> #include <stdbool.h> @@ -16,6 +17,7 @@ struct perf_top { struct perf_tool tool; struct perf_evlist *evlist; struct record_opts record_opts; + struct annotation_options annotation_opts; /* * Symbols will be added here in perf_event__process_sample and will * get out after decayed. @@ -35,7 +37,6 @@ struct perf_top { struct perf_session *session; struct winsize winsize; int realtime_prio; - int sym_pcnt_filter; const char *sym_filter; float min_percent; unsigned int nr_threads_synthesize; diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index d7f2113462fb..8ad8e755127b 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c @@ -103,11 +103,10 @@ out: static int record_header_files(void) { - char *path; + char *path = get_events_file("header_page"); struct stat st; int err = -EIO; - path = get_tracing_file("events/header_page"); if (!path) { pr_debug("can't get tracing/events/header_page"); return -ENOMEM; @@ -128,9 +127,9 @@ static int record_header_files(void) goto out; } - put_tracing_file(path); + put_events_file(path); - path = get_tracing_file("events/header_event"); + path = get_events_file("header_event"); if (!path) { pr_debug("can't get tracing/events/header_event"); err = -ENOMEM; @@ -154,7 +153,7 @@ static int record_header_files(void) err = 0; out: - put_tracing_file(path); + put_events_file(path); return err; } @@ -243,7 +242,7 @@ static int record_ftrace_files(struct tracepoint_path *tps) char *path; int ret; - path = get_tracing_file("events/ftrace"); + path = get_events_file("ftrace"); if (!path) { pr_debug("can't get tracing/events/ftrace"); return -ENOMEM; @@ -378,7 +377,7 @@ out: static int record_saved_cmdline(void) { - unsigned int size; + unsigned long long size; char *path; struct stat st; int ret, err = 0; @@ -532,12 +531,14 @@ struct tracing_data *tracing_data_get(struct list_head *pattrs, "/tmp/perf-XXXXXX"); if (!mkstemp(tdata->temp_file)) { pr_debug("Can't make temp file"); + free(tdata); return NULL; } temp_fd = open(tdata->temp_file, O_RDWR); if (temp_fd < 0) { pr_debug("Can't read '%s'", tdata->temp_file); + free(tdata); return NULL; } diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index e0a6e9a6a053..32e558a65af3 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -32,23 +32,24 @@ static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) { - struct pevent *pevent = context->pevent; - struct event_format *event; - struct format_field *field; + struct tep_handle *pevent = context->pevent; + struct tep_event_format *event; + struct tep_format_field *field; if (!*size) { - if (!pevent->events) + + event = tep_get_first_event(pevent); + if (!event) return 0; - event = pevent->events[0]; - field = pevent_find_common_field(event, type); + field = tep_find_common_field(event, type); if (!field) return 0; *offset = field->offset; *size = field->size; } - return pevent_read_number(pevent, context->event_data + *offset, *size); + return tep_read_number(pevent, context->event_data + *offset, *size); } int common_lock_depth(struct scripting_context *context) @@ -94,29 +95,29 @@ int common_pc(struct scripting_context *context) } unsigned long long -raw_field_value(struct event_format *event, const char *name, void *data) +raw_field_value(struct tep_event_format *event, const char *name, void *data) { - struct format_field *field; + struct tep_format_field *field; unsigned long long val; - field = pevent_find_any_field(event, name); + field = tep_find_any_field(event, name); if (!field) return 0ULL; - pevent_read_number_field(field, data, &val); + tep_read_number_field(field, data, &val); return val; } -unsigned long long read_size(struct event_format *event, void *ptr, int size) +unsigned long long read_size(struct tep_event_format *event, void *ptr, int size) { - return pevent_read_number(event->pevent, ptr, size); + return tep_read_number(event->pevent, ptr, size); } -void event_format__fprintf(struct event_format *event, +void event_format__fprintf(struct tep_event_format *event, int cpu, void *data, int size, FILE *fp) { - struct pevent_record record; + struct tep_record record; struct trace_seq s; memset(&record, 0, sizeof(record)); @@ -125,18 +126,18 @@ void event_format__fprintf(struct event_format *event, record.data = data; trace_seq_init(&s); - pevent_event_info(&s, event, &record); + tep_event_info(&s, event, &record); trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } -void event_format__print(struct event_format *event, +void event_format__print(struct tep_event_format *event, int cpu, void *data, int size) { return event_format__fprintf(event, cpu, data, size, stdout); } -void parse_ftrace_printk(struct pevent *pevent, +void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { unsigned long long addr; @@ -157,61 +158,65 @@ void parse_ftrace_printk(struct pevent *pevent, /* fmt still has a space, skip it */ printk = strdup(fmt+1); line = strtok_r(NULL, "\n", &next); - pevent_register_print_string(pevent, printk, addr); + tep_register_print_string(pevent, printk, addr); + free(printk); } } -void parse_saved_cmdline(struct pevent *pevent, +void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { - char *comm; + char comm[17]; /* Max comm length in the kernel is 16. */ char *line; char *next = NULL; int pid; line = strtok_r(file, "\n", &next); while (line) { - sscanf(line, "%d %ms", &pid, &comm); - pevent_register_comm(pevent, comm, pid); - free(comm); + if (sscanf(line, "%d %16s", &pid, comm) == 2) + tep_register_comm(pevent, comm, pid); line = strtok_r(NULL, "\n", &next); } } -int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size) +int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size) { - return pevent_parse_event(pevent, buf, size, "ftrace"); + return tep_parse_event(pevent, buf, size, "ftrace"); } -int parse_event_file(struct pevent *pevent, +int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys) { - return pevent_parse_event(pevent, buf, size, sys); + return tep_parse_event(pevent, buf, size, sys); } -struct event_format *trace_find_next_event(struct pevent *pevent, - struct event_format *event) +struct tep_event_format *trace_find_next_event(struct tep_handle *pevent, + struct tep_event_format *event) { static int idx; + int events_count; + struct tep_event_format *all_events; - if (!pevent || !pevent->events) + all_events = tep_get_first_event(pevent); + events_count = tep_get_events_count(pevent); + if (!pevent || !all_events || events_count < 1) return NULL; if (!event) { idx = 0; - return pevent->events[0]; + return all_events; } - if (idx < pevent->nr_events && event == pevent->events[idx]) { + if (idx < events_count && event == (all_events + idx)) { idx++; - if (idx == pevent->nr_events) + if (idx == events_count) return NULL; - return pevent->events[idx]; + return (all_events + idx); } - for (idx = 1; idx < pevent->nr_events; idx++) { - if (event == pevent->events[idx - 1]) - return pevent->events[idx]; + for (idx = 1; idx < events_count; idx++) { + if (event == (all_events + (idx - 1))) + return (all_events + idx); } return NULL; } diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 40b425949aa3..76f12c705ef9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -96,22 +96,22 @@ static void skip(int size) }; } -static unsigned int read4(struct pevent *pevent) +static unsigned int read4(struct tep_handle *pevent) { unsigned int data; if (do_read(&data, 4) < 0) return 0; - return __data2host4(pevent, data); + return __tep_data2host4(pevent, data); } -static unsigned long long read8(struct pevent *pevent) +static unsigned long long read8(struct tep_handle *pevent) { unsigned long long data; if (do_read(&data, 8) < 0) return 0; - return __data2host8(pevent, data); + return __tep_data2host8(pevent, data); } static char *read_string(void) @@ -158,7 +158,7 @@ out: return str; } -static int read_proc_kallsyms(struct pevent *pevent) +static int read_proc_kallsyms(struct tep_handle *pevent) { unsigned int size; @@ -181,7 +181,7 @@ static int read_proc_kallsyms(struct pevent *pevent) return 0; } -static int read_ftrace_printk(struct pevent *pevent) +static int read_ftrace_printk(struct tep_handle *pevent) { unsigned int size; char *buf; @@ -208,7 +208,7 @@ static int read_ftrace_printk(struct pevent *pevent) return 0; } -static int read_header_files(struct pevent *pevent) +static int read_header_files(struct tep_handle *pevent) { unsigned long long size; char *header_page; @@ -235,13 +235,13 @@ static int read_header_files(struct pevent *pevent) return -1; } - if (!pevent_parse_header_page(pevent, header_page, size, - pevent_get_long_size(pevent))) { + if (!tep_parse_header_page(pevent, header_page, size, + tep_get_long_size(pevent))) { /* * The commit field in the page is of type long, * use that instead, since it represents the kernel. */ - pevent_set_long_size(pevent, pevent->header_page_size_size); + tep_set_long_size(pevent, tep_get_header_page_size(pevent)); } free(header_page); @@ -259,7 +259,7 @@ static int read_header_files(struct pevent *pevent) return ret; } -static int read_ftrace_file(struct pevent *pevent, unsigned long long size) +static int read_ftrace_file(struct tep_handle *pevent, unsigned long long size) { int ret; char *buf; @@ -284,8 +284,8 @@ out: return ret; } -static int read_event_file(struct pevent *pevent, char *sys, - unsigned long long size) +static int read_event_file(struct tep_handle *pevent, char *sys, + unsigned long long size) { int ret; char *buf; @@ -297,10 +297,8 @@ static int read_event_file(struct pevent *pevent, char *sys, } ret = do_read(buf, size); - if (ret < 0) { - free(buf); + if (ret < 0) goto out; - } ret = parse_event_file(pevent, buf, size, sys); if (ret < 0) @@ -310,7 +308,7 @@ out: return ret; } -static int read_ftrace_files(struct pevent *pevent) +static int read_ftrace_files(struct tep_handle *pevent) { unsigned long long size; int count; @@ -328,7 +326,7 @@ static int read_ftrace_files(struct pevent *pevent) return 0; } -static int read_event_files(struct pevent *pevent) +static int read_event_files(struct tep_handle *pevent) { unsigned long long size; char *sys; @@ -349,14 +347,17 @@ static int read_event_files(struct pevent *pevent) for (x=0; x < count; x++) { size = read8(pevent); ret = read_event_file(pevent, sys, size); - if (ret) + if (ret) { + free(sys); return ret; + } } + free(sys); } return 0; } -static int read_saved_cmdline(struct pevent *pevent) +static int read_saved_cmdline(struct tep_handle *pevent) { unsigned long long size; char *buf; @@ -399,7 +400,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) int host_bigendian; int file_long_size; int file_page_size; - struct pevent *pevent = NULL; + struct tep_handle *pevent = NULL; int err; repipe = __repipe; @@ -439,9 +440,9 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) pevent = tevent->pevent; - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, file_bigendian); - pevent_set_host_bigendian(pevent, host_bigendian); + tep_set_flag(pevent, TEP_NSEC_OUTPUT); + tep_set_file_bigendian(pevent, file_bigendian); + tep_set_host_bigendian(pevent, host_bigendian); if (do_read(buf, 1) < 0) goto out; @@ -451,8 +452,8 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) if (!file_page_size) goto out; - pevent_set_long_size(pevent, file_long_size); - pevent_set_page_size(pevent, file_page_size); + tep_set_long_size(pevent, file_long_size); + tep_set_page_size(pevent, file_page_size); err = read_header_files(pevent); if (err) @@ -479,9 +480,9 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe) repipe = false; if (show_funcs) { - pevent_print_funcs(pevent); + tep_print_funcs(pevent); } else if (show_printk) { - pevent_print_printk(pevent); + tep_print_printk(pevent); } pevent = NULL; diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index b1e5c3a2b8e3..b749f812ac70 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -66,7 +66,7 @@ static int python_start_script_unsupported(const char *script __maybe_unused, return -1; } -static int python_generate_script_unsupported(struct pevent *pevent +static int python_generate_script_unsupported(struct tep_handle *pevent __maybe_unused, const char *outfile __maybe_unused) @@ -130,7 +130,7 @@ static int perl_start_script_unsupported(const char *script __maybe_unused, return -1; } -static int perl_generate_script_unsupported(struct pevent *pevent +static int perl_generate_script_unsupported(struct tep_handle *pevent __maybe_unused, const char *outfile __maybe_unused) { diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 16a776371d03..95664b2f771e 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -28,10 +28,10 @@ static bool tevent_initialized; int trace_event__init(struct trace_event *t) { - struct pevent *pevent = pevent_alloc(); + struct tep_handle *pevent = tep_alloc(); if (pevent) { - t->plugin_list = traceevent_load_plugins(pevent); + t->plugin_list = tep_load_plugins(pevent); t->pevent = pevent; } @@ -40,56 +40,60 @@ int trace_event__init(struct trace_event *t) static int trace_event__init2(void) { - int be = traceevent_host_bigendian(); - struct pevent *pevent; + int be = tep_host_bigendian(); + struct tep_handle *pevent; if (trace_event__init(&tevent)) return -1; pevent = tevent.pevent; - pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); - pevent_set_file_bigendian(pevent, be); - pevent_set_host_bigendian(pevent, be); + tep_set_flag(pevent, TEP_NSEC_OUTPUT); + tep_set_file_bigendian(pevent, be); + tep_set_host_bigendian(pevent, be); tevent_initialized = true; return 0; } int trace_event__register_resolver(struct machine *machine, - pevent_func_resolver_t *func) + tep_func_resolver_t *func) { if (!tevent_initialized && trace_event__init2()) return -1; - return pevent_set_function_resolver(tevent.pevent, func, machine); + return tep_set_function_resolver(tevent.pevent, func, machine); } void trace_event__cleanup(struct trace_event *t) { - traceevent_unload_plugins(t->plugin_list, t->pevent); - pevent_free(t->pevent); + tep_unload_plugins(t->plugin_list, t->pevent); + tep_free(t->pevent); } /* * Returns pointer with encoded error via <linux/err.h> interface. */ -static struct event_format* +static struct tep_event_format* tp_format(const char *sys, const char *name) { - struct pevent *pevent = tevent.pevent; - struct event_format *event = NULL; + char *tp_dir = get_events_file(sys); + struct tep_handle *pevent = tevent.pevent; + struct tep_event_format *event = NULL; char path[PATH_MAX]; size_t size; char *data; int err; - scnprintf(path, PATH_MAX, "%s/%s/%s/format", - tracing_events_path, sys, name); + if (!tp_dir) + return ERR_PTR(-errno); + + scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name); + put_events_file(tp_dir); err = filename__read_str(path, &data, &size); if (err) return ERR_PTR(err); - pevent_parse_format(pevent, &event, data, size, sys); + tep_parse_format(pevent, &event, data, size, sys); free(data); return event; @@ -98,7 +102,7 @@ tp_format(const char *sys, const char *name) /* * Returns pointer with encoded error via <linux/err.h> interface. */ -struct event_format* +struct tep_event_format* trace_event__tp_format(const char *sys, const char *name) { if (!tevent_initialized && trace_event__init2()) @@ -107,10 +111,10 @@ trace_event__tp_format(const char *sys, const char *name) return tp_format(sys, name); } -struct event_format *trace_event__tp_format_id(int id) +struct tep_event_format *trace_event__tp_format_id(int id) { if (!tevent_initialized && trace_event__init2()) return ERR_PTR(-ENOMEM); - return pevent_find_event(tevent.pevent, id); + return tep_find_event(tevent.pevent, id); } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index dcbdb53dc702..f024d73bfc40 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -3,6 +3,7 @@ #define _PERF_UTIL_TRACE_EVENT_H #include <traceevent/event-parse.h> +#include <traceevent/trace-seq.h> #include "parse-events.h" struct machine; @@ -10,46 +11,46 @@ struct perf_sample; union perf_event; struct perf_tool; struct thread; -struct plugin_list; +struct tep_plugin_list; struct trace_event { - struct pevent *pevent; - struct plugin_list *plugin_list; + struct tep_handle *pevent; + struct tep_plugin_list *plugin_list; }; int trace_event__init(struct trace_event *t); void trace_event__cleanup(struct trace_event *t); int trace_event__register_resolver(struct machine *machine, - pevent_func_resolver_t *func); -struct event_format* + tep_func_resolver_t *func); +struct tep_event_format* trace_event__tp_format(const char *sys, const char *name); -struct event_format *trace_event__tp_format_id(int id); +struct tep_event_format *trace_event__tp_format_id(int id); int bigendian(void); -void event_format__fprintf(struct event_format *event, +void event_format__fprintf(struct tep_event_format *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct event_format *event, +void event_format__print(struct tep_event_format *event, int cpu, void *data, int size); -int parse_ftrace_file(struct pevent *pevent, char *buf, unsigned long size); -int parse_event_file(struct pevent *pevent, +int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); +int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); unsigned long long -raw_field_value(struct event_format *event, const char *name, void *data); +raw_field_value(struct tep_event_format *event, const char *name, void *data); -void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size); -void parse_ftrace_printk(struct pevent *pevent, char *file, unsigned int size); -void parse_saved_cmdline(struct pevent *pevent, char *file, unsigned int size); +void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size); +void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size); +void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size); ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe); -struct event_format *trace_find_next_event(struct pevent *pevent, - struct event_format *event); -unsigned long long read_size(struct event_format *event, void *ptr, int size); +struct tep_event_format *trace_find_next_event(struct tep_handle *pevent, + struct tep_event_format *event); +unsigned long long read_size(struct tep_event_format *event, void *ptr, int size); unsigned long long eval_flag(const char *flag); int read_tracing_data(int fd, struct list_head *pattrs); @@ -83,7 +84,7 @@ struct scripting_ops { void (*process_stat)(struct perf_stat_config *config, struct perf_evsel *evsel, u64 tstamp); void (*process_stat_interval)(u64 tstamp); - int (*generate_script) (struct pevent *pevent, const char *outfile); + int (*generate_script) (struct tep_handle *pevent, const char *outfile); }; extern unsigned int scripting_max_stack; @@ -94,7 +95,7 @@ void setup_perl_scripting(void); void setup_python_scripting(void); struct scripting_context { - struct pevent *pevent; + struct tep_handle *pevent; void *event_data; }; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 7bdd239c795c..5eff9bfc5758 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -28,10 +28,11 @@ static int __report_module(struct addr_location *al, u64 ip, { Dwfl_Module *mod; struct dso *dso = NULL; - - thread__find_addr_location(ui->thread, - PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, al); + /* + * Some callers will use al->sym, so we can't just use the + * cheaper thread__find_map() here. + */ + thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al); if (al->map) dso = al->map->dso; @@ -44,13 +45,13 @@ static int __report_module(struct addr_location *al, u64 ip, Dwarf_Addr s; dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); - if (s != al->map->start) + if (s != al->map->start - al->map->pgoff) mod = 0; } if (!mod) mod = dwfl_report_elf(ui->dwfl, dso->short_name, - (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start, + (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff, false); return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; @@ -76,7 +77,7 @@ static int entry(u64 ip, struct unwind_info *ui) if (__report_module(&al, ip, ui)) return -1; - e->ip = al.addr; + e->ip = ip; e->map = al.map; e->sym = al.sym; @@ -103,19 +104,7 @@ static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr, struct addr_location al; ssize_t size; - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, addr, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, addr, &al); - } - - if (!al.map) { + if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) { pr_debug("unwind: no map for %lx\n", (unsigned long)addr); return -1; } diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index af873044d33a..79f521a552cf 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -366,19 +366,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, static struct map *find_map(unw_word_t ip, struct unwind_info *ui) { struct addr_location al; - - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - if (!al.map) { - /* - * We've seen cases (softice) where DWARF unwinder went - * through non executable mmaps, which we need to lookup - * in MAP__VARIABLE tree. - */ - thread__find_addr_map(ui->thread, PERF_RECORD_MISC_USER, - MAP__VARIABLE, ip, &al); - } - return al.map; + return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al); } static int @@ -586,12 +574,9 @@ static int entry(u64 ip, struct thread *thread, struct unwind_entry e; struct addr_location al; - thread__find_addr_location(thread, PERF_RECORD_MISC_USER, - MAP__FUNCTION, ip, &al); - - e.ip = al.addr; + e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al); + e.ip = ip; e.map = al.map; - e.sym = al.sym; pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n", al.sym ? al.sym->name : "''", diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 1019bbc5dbd8..093352e93d50 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -38,11 +38,43 @@ void perf_set_multithreaded(void) } unsigned int page_size; -int cacheline_size; + +#ifdef _SC_LEVEL1_DCACHE_LINESIZE +#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) +#else +static void cache_line_size(int *cacheline_sizep) +{ + if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep)) + pr_debug("cannot determine cache line size"); +} +#endif + +int cacheline_size(void) +{ + static int size; + + if (!size) + cache_line_size(&size); + + return size; +} int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK; +int sysctl__max_stack(void) +{ + int value; + + if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) + sysctl_perf_event_max_stack = value; + + if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0) + sysctl_perf_event_max_contexts_per_stack = value; + + return sysctl_perf_event_max_stack; +} + bool test_attr__enabled; bool perf_host = true; @@ -189,7 +221,7 @@ out: return err; } -static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) { void *ptr; loff_t pgoff; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index c9626c206208..ece040b799f6 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -6,6 +6,7 @@ /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ #define _DEFAULT_SOURCE 1 +#include <fcntl.h> #include <stdbool.h> #include <stddef.h> #include <stdlib.h> @@ -35,6 +36,7 @@ bool lsdir_no_dot_filter(const char *name, struct dirent *d); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi); +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size); ssize_t readn(int fd, void *buf, size_t n); ssize_t writen(int fd, const void *buf, size_t n); @@ -43,7 +45,9 @@ size_t hex_width(u64 v); int hex2u64(const char *ptr, u64 *val); extern unsigned int page_size; -extern int cacheline_size; +int __pure cacheline_size(void); + +int sysctl__max_stack(void); int fetch_kernel_version(unsigned int *puint, char *str, size_t str_sz); @@ -55,6 +59,10 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); +#ifndef HAVE_GET_CURRENT_DIR_NAME +char *get_current_dir_name(void); +#endif + #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); #endif diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 0acb1ec0e2f0..741af209b19d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -139,12 +139,10 @@ static enum dso_type machine__thread_dso_type(struct machine *machine, struct thread *thread) { enum dso_type dso_type = DSO__TYPE_UNKNOWN; - struct map *map; - struct dso *dso; + struct map *map = map_groups__first(thread->mg); - map = map_groups__first(thread->mg, MAP__FUNCTION); for (; map ; map = map_groups__next(map)) { - dso = map->dso; + struct dso *dso = map->dso; if (!dso || dso->long_name[0] != '/') continue; dso_type = dso__type(dso, machine); diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index a725b958cf31..902ce6384f57 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c @@ -5,6 +5,8 @@ #include <sys/stat.h> #include <sys/mman.h> #include <zlib.h> +#include <linux/compiler.h> +#include <unistd.h> #include "util/compress.h" #include "util/util.h" @@ -79,3 +81,19 @@ out_close: return ret == Z_STREAM_END ? 0 : -1; } + +bool gzip_is_compressed(const char *input) +{ + int fd = open(input, O_RDONLY); + const uint8_t magic[2] = { 0x1f, 0x8b }; + char buf[2] = { 0 }; + ssize_t rc; + + if (fd < 0) + return -1; + + rc = read(fd, buf, sizeof(buf)); + close(fd); + return rc == sizeof(buf) ? + memcmp(buf, magic, sizeof(buf)) == 0 : false; +} |