From 726647d0526c5c2f3472010677122b89d9e4ef88 Mon Sep 17 00:00:00 2001 From: Jack Henschel Date: Thu, 24 Aug 2017 15:20:22 +0200 Subject: perf stat: Fix path to PMU formats in documentation As defined in tools/perf/util/pmu.c, the EVENT_SOURCE_DEVICE_PATH is /sys/bus/event_source/devices/ (no traling 's' in event_source) This patch corrects the path in the perf stat documentation Signed-off-by: Jack Henschel Cc: Alexander Shishkin Cc: Jack Henschel Cc: Peter Zijlstra Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/20170824132022.10934-1-jackdev@mailbox.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 698076313606..c37d61682dfb 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -41,13 +41,13 @@ report:: - a symbolically formed event like 'pmu/param1=0x3,param2/' where param1 and param2 are defined as formats for the PMU in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' where M, N, K are numbers (in decimal, hex, octal format). Acceptable values for each of 'config', 'config1' and 'config2' parameters are defined by corresponding entries in - /sys/bus/event_sources/devices//format/* + /sys/bus/event_source/devices//format/* -i:: --no-inherit:: -- cgit v1.2.1 From 6bd76b8fabe157233e498931c3f9298ee7128a28 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 25 Aug 2017 15:45:10 -0300 Subject: perf tools: Fix static build with newer toolchains We can't pass --dynamic-list list into static build anymore, because compilers starts to scream about that. Fedora 26 started to fail build with following error: $ make LDFLAGS=-static ... /usr/bin/ld: dynamic STT_GNU_IFUNC symbol `strcmp' with pointer equality in `/usr/lib/gcc/x86_64-redhat-linux/7/../../../../lib64/libc.a(strcmp.o +)' can not be used when making an executable; recompile with -fPIE and relink with -pie There's no sense for --dynamic-list in static build, because there's no .dynsym table in static binary. Consequently the traceevent plugins have never worked with static build, but it was quietly passed by. To fix this in future I think we should add support to compile plugins within the perf binary directly for static build. Reported-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-jeg6a7ff9j9hlqn8k4gllzvv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 70ddc65f898d..a700a079a218 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -279,7 +279,13 @@ LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list -LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) + +# +# The static build has no dynsym table, so this does not work for +# static build. Looks like linker starts to scream about that now +# (in Fedora 26) so we need to switch it off for static build. +DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) LIBAPI = $(API_PATH)libapi.a export LIBAPI -- cgit v1.2.1 From 12c15302dd4b768105d4b7a487ed4858ccab94fc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 10:57:32 +0200 Subject: perf c2c: Fix remote HITM detection for Skylake Skylake introduced new mem_remote bit in union perf_mem_data_src [1]. It applies to any other memory level to express Remote unknown level, as is reported by Skylake. Adding this extra check to c2c_decode_stats to properly decode remote HITMs on Skylake. [1] http://lkml.kernel.org/r/20170816222156.19953-4-andi@firstfloor.org Signed-off-by: Jiri Olsa Acked-by: Andi Kleen Cc: Alexander Shishkin Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824085732.28481-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index ced4f3fff035..28afe5fa84d6 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -316,6 +316,11 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) u64 lvl = data_src->mem_lvl; u64 snoop = data_src->mem_snoop; u64 lock = data_src->mem_lock; + /* + * Skylake might report unknown remote level via this + * bit, consider it when evaluating remote HITMs. + */ + bool mrem = data_src->mem_remote; int err = 0; #define HITM_INC(__f) \ @@ -361,7 +366,8 @@ do { \ } if ((lvl & P(LVL, REM_RAM1)) || - (lvl & P(LVL, REM_RAM2))) { + (lvl & P(LVL, REM_RAM2)) || + mrem) { stats->rmt_dram++; if (snoop & P(SNOOP, HIT)) stats->ld_shared++; @@ -371,7 +377,8 @@ do { \ } if ((lvl & P(LVL, REM_CCE1)) || - (lvl & P(LVL, REM_CCE2))) { + (lvl & P(LVL, REM_CCE2)) || + mrem) { if (snoop & P(SNOOP, HIT)) stats->rmt_hit++; else if (snoop & P(SNOOP, HITM)) -- cgit v1.2.1 From a17f06978769735ab5c7598c46881fa201e9b1a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:31 +0200 Subject: perf record: Set read_format for inherit_stat Set read_format for what we expect to get from read event generated by perf_event_attr::inherit_stat. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a5888c704e01..d9bd632ed7db 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -902,8 +902,13 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, if (opts->no_samples) attr->sample_freq = 0; - if (opts->inherit_stat) + if (opts->inherit_stat) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING | + PERF_FORMAT_ID; attr->inherit_stat = 1; + } if (opts->sample_address) { perf_evsel__set_sample_bit(evsel, ADDR); -- cgit v1.2.1 From dac7f6b7ed1c8601358357f60e9764a4c6a68d71 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:32 +0200 Subject: perf report: Add dump_read function Adding dump_read function to gather all the dump output of read function. Adding output of enabled and running times and id if enabled (3 new lines with '...' prefix below). $ perf record -s ... $ perf report -D 958358311769 0x91f8 [0x40]: PERF_RECORD_READ: 3339 3339 cycles:u 0 ... time enabled : 958358313731 ... time running : 958358313731 ... id : 80 Committer note: Do not use 'read' as a variable name as it breaks the build on older systems, such as RHEL6: CC /tmp/build/perf/util/session.o cc1: warnings being treated as errors util/session.c: In function 'dump_read': util/session.c:1132: error: declaration of 'read' shadows a global declaration /usr/include/bits/unistd.h:35: error: shadowed declaration is here mv: cannot stat `/tmp/build/perf/util/.session.o.tmp': No such file or directory Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ---- tools/perf/util/session.c | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bace3429c030..9e4004b08f55 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -249,10 +249,6 @@ static int process_read_event(struct perf_tool *tool, return err; } - dump_printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, - evsel ? perf_evsel__name(evsel) : "FAIL", - event->read.value); - return 0; } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dc453f84a14c..ac863691605f 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1127,6 +1127,30 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } +static void dump_read(struct perf_evsel *evsel, union perf_event *event) +{ + struct read_event *read_event = &event->read; + u64 read_format; + + if (!dump_trace) + return; + + printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid, + evsel ? perf_evsel__name(evsel) : "FAIL", + event->read.value); + + read_format = evsel->attr.read_format; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + printf("... time enabled : %" PRIu64 "\n", read_event->time_enabled); + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + printf("... time running : %" PRIu64 "\n", read_event->time_running); + + if (read_format & PERF_FORMAT_ID) + printf("... id : %" PRIu64 "\n", read_event->id); +} + static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) @@ -1271,6 +1295,7 @@ static int machines__deliver_event(struct machines *machines, evlist->stats.total_lost_samples += event->lost_samples.lost; return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: + dump_read(evsel, event); return tool->read(tool, event, sample, evsel, machine); case PERF_RECORD_THROTTLE: return tool->throttle(tool, event, sample, machine); -- cgit v1.2.1 From 64eed1deb6d87f4c0efe03297f50367a3689eb56 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:33 +0200 Subject: perf values: Fix thread index bug We are taking wrong index (+1) for first thread, which leaves thread with index 0 unused and uninitialized. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 5de2e15e2eda..9ac36bf2c438 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -98,7 +98,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, return i; } - i = values->threads + 1; + i = values->threads; values->value[i] = malloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); @@ -106,7 +106,7 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } values->pid[i] = pid; values->tid[i] = tid; - values->threads = i; + values->threads = i + 1; return i; } -- cgit v1.2.1 From f4ef3b7c184c4c269f953f226f7158347d007622 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:34 +0200 Subject: perf values: Fix allocation check Bailing out in case the allocation failed, not the other way round. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 9ac36bf2c438..2c4af02f08cd 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -131,7 +131,7 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); - if (value) { + if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } -- cgit v1.2.1 From a1834fc938344dd3015a1df64ee7f2af70ded147 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:35 +0200 Subject: perf values: Zero value buffers We need to make sure the array of value pointers are zero initialized, because we use them in realloc later on and uninitialized non zero value will cause allocation error and aborted execution. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/values.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 2c4af02f08cd..3b56aeaa8cbb 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -12,7 +12,7 @@ int perf_read_values_init(struct perf_read_values *values) values->threads_max = 16; values->pid = malloc(values->threads_max * sizeof(*values->pid)); values->tid = malloc(values->threads_max * sizeof(*values->tid)); - values->value = malloc(values->threads_max * sizeof(*values->value)); + values->value = zalloc(values->threads_max * sizeof(*values->value)); if (!values->pid || !values->tid || !values->value) { pr_debug("failed to allocate read_values threads arrays"); goto out_free_pid; @@ -99,7 +99,8 @@ static int perf_read_values__findnew_thread(struct perf_read_values *values, } i = values->threads; - values->value[i] = malloc(values->counters_max * sizeof(**values->value)); + + values->value[i] = zalloc(values->counters_max * sizeof(**values->value)); if (!values->value[i]) { pr_debug("failed to allocate read_values counters array"); return -ENOMEM; @@ -130,12 +131,16 @@ static int perf_read_values__enlarge_counters(struct perf_read_values *values) for (i = 0; i < values->threads; i++) { u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value)); + int j; if (!value) { pr_debug("failed to enlarge read_values ->values array"); goto out_free_name; } + for (j = values->counters_max; j < counters_max; j++) + value[j] = 0; + values->value[i] = value; } -- cgit v1.2.1 From 9933183e365f7dd3a79507f1ffb4bcf9433a73ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Aug 2017 18:27:36 +0200 Subject: perf report: Group stat values on global event id There's no big value on displaying counts for every event ID, which is one per every CPU. Rather than that, displaying the whole sum for the event. $ perf record -c 100000 -e cycles:u -s test $ perf report -T Before: # PID TID cycles:u cycles:u cycles:u cycles:u ... [20 more columns of 'cycles:u'] 3339 3339 0 0 0 0 3340 3340 0 0 0 0 3341 3341 0 0 0 0 3342 3342 0 0 0 0 Now: # PID TID cycles:u 3339 3339 19678 3340 3340 18744 3341 3341 17335 3342 3342 26414 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170824162737.7813-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 +- tools/perf/util/values.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9e4004b08f55..f9dff652dcbd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -241,7 +241,7 @@ static int process_read_event(struct perf_tool *tool, const char *name = evsel ? perf_evsel__name(evsel) : "unknown"; int err = perf_read_values_add_value(&rep->show_threads_values, event->read.pid, event->read.tid, - event->read.id, + evsel->idx, name, event->read.value); diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c index 3b56aeaa8cbb..8a32bb0095e5 100644 --- a/tools/perf/util/values.c +++ b/tools/perf/util/values.c @@ -192,7 +192,7 @@ int perf_read_values_add_value(struct perf_read_values *values, if (cindex < 0) return cindex; - values->value[tindex][cindex] = value; + values->value[tindex][cindex] += value; return 0; } -- cgit v1.2.1 From 39a59f1e3ea541035637432db39158a461f29146 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:38 -0700 Subject: perf tools: Allow external definition of flex and bison binary names Allow user to define flex and bison binary names by passing FLEX and BISON variables. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-3-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a700a079a218..58924eb0f40b 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -164,8 +164,8 @@ LN = ln -f MKDIR = mkdir FIND = find INSTALL = install -FLEX = flex -BISON = bison +FLEX ?= flex +BISON ?= bison STRIP = strip AWK = awk -- cgit v1.2.1 From 3866058ef15b6ae6f4ff48e088428b46bcc43fa1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:40 -0700 Subject: perf tools: Robustify detection of clang binary Prior to this patch, make scripts tested for CLANG with ifeq ($(CC), clang), failing to detect CLANG binaries with different names. Fix it by testing for the existence of __clang__ macro in the list of compiler defined macros. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-5-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 ++-- tools/perf/util/intel-pt-decoder/Build | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6a64c6bbd9a5..63f534a0902f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -148,7 +148,7 @@ ifndef DEBUG endif ifeq ($(DEBUG),0) -ifeq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 @@ -184,7 +184,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC), clang) + ifeq ($(CC_NO_CLANG), 1) PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) endif FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 7aca5d6d7e1f..10e0814bb8d2 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -25,6 +25,6 @@ $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/in CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -ifneq ($(CC), clang) +ifeq ($(CC_NO_CLANG), 1) CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init endif -- cgit v1.2.1 From 70ff7c6caa2f2cee4a7621f5cb3b73b0a38327f1 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros Date: Sun, 27 Aug 2017 00:54:42 -0700 Subject: perf tools: Pass full path of FEATURES_DUMP When building with an external FEATURES_DUMP, bpf complains that features dump file is not found. Fix it by passing full file path. Signed-off-by: David Carrillo-Cisneros Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Paul Turner Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20170827075442.108534-7-davidcc@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 58924eb0f40b..a5bf3100f812 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -240,7 +240,7 @@ endif ifeq ($(FEATURES_DUMP),) FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP) else -FEATURE_DUMP_EXPORT := $(FEATURES_DUMP) +FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP)) endif export prefix bindir sharedir sysconfdir DESTDIR -- cgit v1.2.1 From 83bc9c371eaf21466f43b12d942b66c3f0d60ae5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Aug 2017 11:47:11 -0300 Subject: perf trace beauty: Beautify pkey_{alloc,free,mprotect} arguments Reuse 'mprotect' beautifiers for 'pkey_mprotect'. System wide tracing pkey_alloc, pkey_free and pkey_mprotect calls, with backtraces: # perf trace -e pkey_alloc,pkey_mprotect,pkey_free --max-stack=5 0.000 ( 0.011 ms): pkey/7818 pkey_alloc(init_val: DISABLE_ACCESS|DISABLE_WRITE) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_alloc (/home/acme/c/pkey) 0.022 ( 0.003 ms): pkey/7818 pkey_mprotect(start: 0x7f28c3890000, len: 4096, prot: READ|WRITE, pkey: -1) = 0 syscall (/usr/lib64/libc-2.25.so) pkey_mprotect (/home/acme/c/pkey) 0.030 ( 0.002 ms): pkey/7818 pkey_free(pkey: -1 ) = -1 EINVAL Invalid argument syscall (/usr/lib64/libc-2.25.so) pkey_free (/home/acme/c/pkey) The tools/include/uapi/asm-generic/mman-common.h file is used to find the access rights defines for the pkey_alloc syscall second argument. Since we have the detector of changes for the tools/include header files versus its kernel origin (include/uapi/asm-generic/mman-common.h), we'll get whatever new flag appears for that argument automatically. This method should be used in other cases where it is easy to generate those flags tables because the header has properly namespaced defines like PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE. Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Dave Hansen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-3xq5312qlks7wtfzv2sk3nct@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 12 +++++- tools/perf/builtin-trace.c | 8 ++++ tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 ++ tools/perf/trace/beauty/pkey_alloc.c | 50 ++++++++++++++++++++++ .../perf/trace/beauty/pkey_alloc_access_rights.sh | 10 +++++ 6 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/pkey_alloc.c create mode 100755 tools/perf/trace/beauty/pkey_alloc_access_rights.sh (limited to 'tools/perf') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a5bf3100f812..91ef44bfaf3e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -387,7 +387,8 @@ export INSTALL SHELL_PATH SHELL = $(SHELL_PATH) -beauty_ioctl_outdir := $(OUTPUT)trace/beauty/generated/ioctl +beauty_outdir := $(OUTPUT)trace/beauty/generated +beauty_ioctl_outdir := $(beauty_outdir)/ioctl drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c drm_hdr_dir := $(srctree)/tools/include/uapi/drm drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh @@ -398,6 +399,13 @@ _dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_ou $(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl) $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@ +pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c +asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ +pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh + +$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl) + $(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@ + sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh @@ -528,6 +536,7 @@ __build-dir = $(subst $(OUTPUT),,$(dir $@)) build-dir = $(if $(__build-dir),$(__build-dir),.) prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ + $(pkey_alloc_access_rights_array) \ $(sndrv_pcm_ioctl_array) \ $(sndrv_ctl_ioctl_array) \ $(kvm_ioctl_array) \ @@ -803,6 +812,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ $(OUTPUT)pmu-events/pmu-events.c \ $(OUTPUT)$(drm_ioctl_array) \ + $(OUTPUT)$(pkey_alloc_access_rights_array) \ $(OUTPUT)$(sndrv_ctl_ioctl_array) \ $(OUTPUT)$(sndrv_pcm_ioctl_array) \ $(OUTPUT)$(kvm_ioctl_array) \ diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 91905839e386..d59cdadf3a79 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -693,6 +693,14 @@ static struct syscall_fmt { [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, + { .name = "pkey_alloc", + .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, + { .name = "pkey_free", + .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, + { .name = "pkey_mprotect", + .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, + [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, + [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, { .name = "poll", .timeout = true, }, { .name = "ppoll", .timeout = true, }, { .name = "pread", .alias = "pread64", }, diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 6f3f159f97e0..175d633c6b49 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -3,4 +3,5 @@ libperf-y += fcntl.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif +libperf-y += pkey_alloc.o libperf-y += statx.o diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index 47a36a8eb842..4b58581a6053 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -78,6 +78,9 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights + size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c new file mode 100644 index 000000000000..2ba784a3734a --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc.c @@ -0,0 +1,50 @@ +/* + * trace/beauty/pkey_alloc.c + * + * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "trace/beauty/beauty.h" +#include +#include + +static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size) +{ + int i, printed = 0; + +#include "trace/beauty/generated/pkey_alloc_access_rights_array.c" + static DEFINE_STRARRAY(pkey_alloc_access_rights); + + if (access_rights == 0) { + const char *s = strarray__pkey_alloc_access_rights.entries[0]; + if (s) + return scnprintf(bf, size, "%s", s); + return scnprintf(bf, size, "%d", 0); + } + + for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) { + int bit = 1 << (i - 1); + + if (!(access_rights & bit)) + continue; + + if (printed != 0) + printed += scnprintf(bf + printed, size - printed, "|"); + + if (strarray__pkey_alloc_access_rights.entries[i] != NULL) + printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]); + else + printed += scnprintf(bf + printed, size - printed, "0x%#", bit); + } + + return printed; +} + +size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned long cmd = arg->val; + + return pkey_alloc__scnprintf_access_rights(cmd, bf, size); +} diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh new file mode 100755 index 000000000000..62e51a02b839 --- /dev/null +++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh @@ -0,0 +1,10 @@ +#!/bin/sh + +header_dir=$1 + +printf "static const char *pkey_alloc_access_rights[] = {\n" +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*' +egrep $regex ${header_dir}/mman-common.h | \ + sed -r "s/$regex/\2 \2 \1/g" | \ + sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n" +printf "};\n" -- cgit v1.2.1 From 2c29461e273abaf149cf8220c3403e9d67dd8b61 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Tue, 29 Aug 2017 20:57:23 +0800 Subject: perf probe: Fix kprobe blacklist checking condition The commit 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events"), 'perf probe' supports checking the blacklist of the fuctions which can not be probed. But the checking condition is wrong, that the end_addr of the symbol which is the start_addr of the next symbol can't be included. Committer notes: IOW make it match its kernel counterpart in kernel/kprobes.c: bool within_kprobe_blacklist(unsigned long addr) Each entry have as its end address not its end address, but the first address _outside_ that symbol, which for related functions, is the first address of the next symbol, like these from kernel/trace/trace_probe.c: 0xffffffffbd198df0-0xffffffffbd198e40 print_type_u8 0xffffffffbd198e40-0xffffffffbd198e90 print_type_u16 0xffffffffbd198e90-0xffffffffbd198ee0 print_type_u32 0xffffffffbd198ee0-0xffffffffbd198f30 print_type_u64 0xffffffffbd198f30-0xffffffffbd198f80 print_type_s8 0xffffffffbd198f80-0xffffffffbd198fd0 print_type_s16 0xffffffffbd198fd0-0xffffffffbd199020 print_type_s32 0xffffffffbd199020-0xffffffffbd199070 print_type_s64 0xffffffffbd199070-0xffffffffbd1990c0 print_type_x8 0xffffffffbd1990c0-0xffffffffbd199110 print_type_x16 0xffffffffbd199110-0xffffffffbd199160 print_type_x32 0xffffffffbd199160-0xffffffffbd1991b0 print_type_x64 But not always: 0xffffffffbd1997b0-0xffffffffbd1997c0 fetch_kernel_stack_address (kernel/trace/trace_probe.c) 0xffffffffbd1c57f0-0xffffffffbd1c58b0 __context_tracking_enter (kernel/context_tracking.c) Signed-off-by: Li Bin Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Fixes: 9aaf5a5f479b ("perf probe: Check kprobes blacklist when adding new events") Link: http://lkml.kernel.org/r/1504011443-7269-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index d7cd1142f4c6..b7aaf9b2294d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2395,7 +2395,7 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist, struct kprobe_blacklist_node *node; list_for_each_entry(node, blacklist, list) { - if (node->start <= address && address <= node->end) + if (node->start <= address && address < node->end) return node; } -- cgit v1.2.1 From b2f7605076d6cdd68162c42c34caadafbbe4c69f Mon Sep 17 00:00:00 2001 From: Li Bin Date: Mon, 5 Jun 2017 08:34:09 +0800 Subject: perf symbols: Fix plt entry calculation for ARM and AARCH64 On x86, the plt header size is as same as the plt entry size, and can be identified from shdr's sh_entsize of the plt. But we can't assume that the sh_entsize of the plt shdr is always the plt entry size in all architecture, and the plt header size may be not as same as the plt entry size in some architecure. On ARM, the plt header size is 20 bytes and the plt entry size is 12 bytes (don't consider the FOUR_WORD_PLT case) that refer to the binutils implementation. The plt section is as follows: Disassembly of section .plt: 000004a0 <__cxa_finalize@plt-0x14>: 4a0: e52de004 push {lr} ; (str lr, [sp, #-4]!) 4a4: e59fe004 ldr lr, [pc, #4] ; 4b0 <_init+0x1c> 4a8: e08fe00e add lr, pc, lr 4ac: e5bef008 ldr pc, [lr, #8]! 4b0: 00008424 .word 0x00008424 000004b4 <__cxa_finalize@plt>: 4b4: e28fc600 add ip, pc, #0, 12 4b8: e28cca08 add ip, ip, #8, 20 ; 0x8000 4bc: e5bcf424 ldr pc, [ip, #1060]! ; 0x424 000004c0 : 4c0: e28fc600 add ip, pc, #0, 12 4c4: e28cca08 add ip, ip, #8, 20 ; 0x8000 4c8: e5bcf41c ldr pc, [ip, #1052]! ; 0x41c On AARCH64, the plt header size is 32 bytes and the plt entry size is 16 bytes. The plt section is as follows: Disassembly of section .plt: 0000000000000560 <__cxa_finalize@plt-0x20>: 560: a9bf7bf0 stp x16, x30, [sp,#-16]! 564: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 568: f944be11 ldr x17, [x16,#2424] 56c: 9125e210 add x16, x16, #0x978 570: d61f0220 br x17 574: d503201f nop 578: d503201f nop 57c: d503201f nop 0000000000000580 <__cxa_finalize@plt>: 580: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 584: f944c211 ldr x17, [x16,#2432] 588: 91260210 add x16, x16, #0x980 58c: d61f0220 br x17 0000000000000590 <__gmon_start__@plt>: 590: 90000090 adrp x16, 10000 <__FRAME_END__+0xf8a8> 594: f944c611 ldr x17, [x16,#2440] 598: 91262210 add x16, x16, #0x988 59c: d61f0220 br x17 NOTES: In addition to ARM and AARCH64, other architectures, such as s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa also need to consider this issue. Signed-off-by: Li Bin Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Alexis Berlemont Cc: David Tolnay Cc: Hanjun Guo Cc: Hemant Kumar Cc: Masami Hiramatsu Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Cc: zhangmengting@huawei.com Link: http://lkml.kernel.org/r/1496622849-21877-1-git-send-email-huawei.libin@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol-elf.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'tools/perf') diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a70479061fce..5c39f420111e 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -259,7 +259,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * { uint32_t nr_rel_entries, idx; GElf_Sym sym; - u64 plt_offset; + u64 plt_offset, plt_header_size, plt_entry_size; GElf_Shdr shdr_plt; struct symbol *f; GElf_Shdr shdr_rel_plt, shdr_dynsym; @@ -326,6 +326,23 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; plt_offset = shdr_plt.sh_offset; + switch (ehdr.e_machine) { + case EM_ARM: + plt_header_size = 20; + plt_entry_size = 12; + break; + + case EM_AARCH64: + plt_header_size = 32; + plt_entry_size = 16; + break; + + default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/sparc/xtensa need to be checked */ + plt_header_size = shdr_plt.sh_entsize; + plt_entry_size = shdr_plt.sh_entsize; + break; + } + plt_offset += plt_header_size; if (shdr_rel_plt.sh_type == SHT_RELA) { GElf_Rela pos_mem, *pos; @@ -335,7 +352,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -346,11 +362,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; } @@ -361,7 +378,6 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * const char *elf_name = NULL; char *demangled = NULL; symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; gelf_getsym(syms, symidx, &sym); elf_name = elf_sym__name(&sym, symstrs); @@ -372,11 +388,12 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss, struct map * "%s@plt", elf_name); free(demangled); - f = symbol__new(plt_offset, shdr_plt.sh_entsize, + f = symbol__new(plt_offset, plt_entry_size, STB_GLOBAL, sympltname); if (!f) goto out_elf_end; + plt_offset += plt_entry_size; symbols__insert(&dso->symbols[map->type], f); ++nr; } -- cgit v1.2.1