summaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-stat.txt5
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile15
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S2
-rw-r--r--tools/perf/bench/mem-memcpy.c219
-rw-r--r--tools/perf/builtin-record.c33
-rw-r--r--tools/perf/builtin-script.c33
-rw-r--r--tools/perf/builtin-stat.c207
-rw-r--r--tools/perf/builtin-top.c6
-rw-r--r--tools/perf/command-list.txt2
-rw-r--r--tools/perf/feature-tests.mak4
-rw-r--r--tools/perf/util/debug.c42
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/event.c2
-rw-r--r--tools/perf/util/header.c11
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/include/asm/cpufeature.h9
-rw-r--r--tools/perf/util/include/asm/dwarf2.h11
-rw-r--r--tools/perf/util/include/linux/bitops.h5
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/parse-events.c12
-rw-r--r--tools/perf/util/probe-finder.h6
-rw-r--r--tools/perf/util/session.c295
-rw-r--r--tools/perf/util/session.h8
-rw-r--r--tools/perf/util/symbol.c6
-rw-r--r--tools/perf/util/ui/util.c16
28 files changed, 695 insertions, 287 deletions
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 4b3a2d46b437..c405bcad6ac7 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -53,6 +53,11 @@ comma-sperated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2
In per-thread mode, this option is ignored. The -a option is still necessary
to activate system-wide monitoring. Default is to count on all CPUs.
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
EXAMPLES
--------
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 8c7fc0c8f0b8..c12659d8cb26 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,7 @@ include/linux/stringify.h
lib/rbtree.c
include/linux/swab.h
arch/*/include/asm/unistd*.h
+arch/*/lib/memcpy*.S
include/linux/poison.h
include/linux/magic.h
include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a9c19d013147..d88137a4356e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -185,7 +185,10 @@ ifeq ($(ARCH),i386)
ARCH := x86
endif
ifeq ($(ARCH),x86_64)
+ RAW_ARCH := x86_64
ARCH := x86
+ ARCH_CFLAGS := -DARCH_X86_64
+ ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
endif
# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -375,6 +378,7 @@ LIB_H += util/include/linux/prefetch.h
LIB_H += util/include/linux/rbtree.h
LIB_H += util/include/linux/string.h
LIB_H += util/include/linux/types.h
+LIB_H += util/include/linux/linkage.h
LIB_H += util/include/asm/asm-offsets.h
LIB_H += util/include/asm/bug.h
LIB_H += util/include/asm/byteorder.h
@@ -383,6 +387,8 @@ LIB_H += util/include/asm/swab.h
LIB_H += util/include/asm/system.h
LIB_H += util/include/asm/uaccess.h
LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
LIB_H += perf.h
LIB_H += util/cache.h
LIB_H += util/callchain.h
@@ -417,6 +423,7 @@ LIB_H += util/probe-finder.h
LIB_H += util/probe-event.h
LIB_H += util/pstack.h
LIB_H += util/cpumap.h
+LIB_H += $(ARCH_INCLUDE)
LIB_OBJS += $(OUTPUT)util/abspath.o
LIB_OBJS += $(OUTPUT)util/alias.o
@@ -472,6 +479,9 @@ BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+endif
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
@@ -507,7 +517,7 @@ PERFLIBS = $(LIB_FILE)
-include config.mak
ifndef NO_DWARF
-FLAGS_DWARF=$(ALL_CFLAGS) -I/usr/include/elfutils -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
+FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
NO_DWARF := 1
@@ -554,7 +564,7 @@ ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
else
- BASIC_CFLAGS += -I/usr/include/elfutils -DDWARF_SUPPORT
+ BASIC_CFLAGS += -DDWARF_SUPPORT
EXTLIBS += -lelf -ldw
LIB_OBJS += $(OUTPUT)util/probe-finder.o
endif # PERF_HAVE_DWARF_REGS
@@ -898,6 +908,7 @@ BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \
LIB_OBJS += $(COMPAT_OBJS)
ALL_CFLAGS += $(BASIC_CFLAGS)
+ALL_CFLAGS += $(ARCH_CFLAGS)
ALL_LDFLAGS += $(BASIC_LDFLAGS)
export TAR INSTALL DESTDIR SHELL_PATH
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 000000000000..a72e36cb5394
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) \
+ extern void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 000000000000..d588b87696fc
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,4 @@
+
+MEMCPY_FN(__memcpy,
+ "x86-64-unrolled",
+ "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 000000000000..a57b66e853c2
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,2 @@
+
+#include "../../../arch/x86/lib/memcpy_64.S"
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 38dae7465142..db82021f4b91 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -12,6 +12,7 @@
#include "../util/parse-options.h"
#include "../util/header.h"
#include "bench.h"
+#include "mem-memcpy-arch.h"
#include <stdio.h>
#include <stdlib.h>
@@ -23,8 +24,10 @@
static const char *length_str = "1MB";
static const char *routine = "default";
-static bool use_clock = false;
+static bool use_clock;
static int clock_fd;
+static bool only_prefault;
+static bool no_prefault;
static const struct option options[] = {
OPT_STRING('l', "length", &length_str, "1MB",
@@ -34,19 +37,33 @@ static const struct option options[] = {
"Specify routine to copy"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
+ OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+ "Show only the result with page faults before memcpy()"),
+ OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+ "Show only the result without page faults before memcpy()"),
OPT_END()
};
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+
struct routine {
const char *name;
const char *desc;
- void * (*fn)(void *dst, const void *src, size_t len);
+ memcpy_t fn;
};
struct routine routines[] = {
{ "default",
"Default memcpy() provided by glibc",
memcpy },
+#ifdef ARCH_X86_64
+
+#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memcpy-x86-64-asm-def.h"
+#undef MEMCPY_FN
+
+#endif
+
{ NULL,
NULL,
NULL }
@@ -89,29 +106,98 @@ static double timeval2double(struct timeval *ts)
(double)ts->tv_usec / (double)1000000;
}
+static void alloc_mem(void **dst, void **src, size_t length)
+{
+ *dst = zalloc(length);
+ if (!dst)
+ die("memory allocation failed - maybe length is too large?\n");
+
+ *src = zalloc(length);
+ if (!src)
+ die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
+{
+ u64 clock_start = 0ULL, clock_end = 0ULL;
+ void *src = NULL, *dst = NULL;
+
+ alloc_mem(&src, &dst, len);
+
+ if (prefault)
+ fn(dst, src, len);
+
+ clock_start = get_clock();
+ fn(dst, src, len);
+ clock_end = get_clock();
+
+ free(src);
+ free(dst);
+ return clock_end - clock_start;
+}
+
+static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+{
+ struct timeval tv_start, tv_end, tv_diff;
+ void *src = NULL, *dst = NULL;
+
+ alloc_mem(&src, &dst, len);
+
+ if (prefault)
+ fn(dst, src, len);
+
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ fn(dst, src, len);
+ BUG_ON(gettimeofday(&tv_end, NULL));
+
+ timersub(&tv_end, &tv_start, &tv_diff);
+
+ free(src);
+ free(dst);
+ return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do { \
+ if (x < K) \
+ printf(" %14lf B/Sec", x); \
+ else if (x < K * K) \
+ printf(" %14lfd KB/Sec", x / K); \
+ else if (x < K * K * K) \
+ printf(" %14lf MB/Sec", x / K / K); \
+ else \
+ printf(" %14lf GB/Sec", x / K / K / K); \
+ } while (0)
+
int bench_mem_memcpy(int argc, const char **argv,
const char *prefix __used)
{
int i;
- void *dst, *src;
- size_t length;
- double bps = 0.0;
- struct timeval tv_start, tv_end, tv_diff;
- u64 clock_start, clock_end, clock_diff;
+ size_t len;
+ double result_bps[2];
+ u64 result_clock[2];
- clock_start = clock_end = clock_diff = 0ULL;
argc = parse_options(argc, argv, options,
bench_mem_memcpy_usage, 0);
- tv_diff.tv_sec = 0;
- tv_diff.tv_usec = 0;
- length = (size_t)perf_atoll((char *)length_str);
+ if (use_clock)
+ init_clock();
+
+ len = (size_t)perf_atoll((char *)length_str);
- if ((s64)length <= 0) {
+ result_clock[0] = result_clock[1] = 0ULL;
+ result_bps[0] = result_bps[1] = 0.0;
+
+ if ((s64)len <= 0) {
fprintf(stderr, "Invalid length:%s\n", length_str);
return 1;
}
+ /* same to without specifying either of prefault and no-prefault */
+ if (only_prefault && no_prefault)
+ only_prefault = no_prefault = false;
+
for (i = 0; routines[i].name; i++) {
if (!strcmp(routines[i].name, routine))
break;
@@ -126,61 +212,80 @@ int bench_mem_memcpy(int argc, const char **argv,
return 1;
}
- dst = zalloc(length);
- if (!dst)
- die("memory allocation failed - maybe length is too large?\n");
-
- src = zalloc(length);
- if (!src)
- die("memory allocation failed - maybe length is too large?\n");
-
- if (bench_format == BENCH_FORMAT_DEFAULT) {
- printf("# Copying %s Bytes from %p to %p ...\n\n",
- length_str, src, dst);
- }
-
- if (use_clock) {
- init_clock();
- clock_start = get_clock();
- } else {
- BUG_ON(gettimeofday(&tv_start, NULL));
- }
-
- routines[i].fn(dst, src, length);
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Copying %s Bytes ...\n\n", length_str);
- if (use_clock) {
- clock_end = get_clock();
- clock_diff = clock_end - clock_start;
+ if (!only_prefault && !no_prefault) {
+ /* show both of results */
+ if (use_clock) {
+ result_clock[0] =
+ do_memcpy_clock(routines[i].fn, len, false);
+ result_clock[1] =
+ do_memcpy_clock(routines[i].fn, len, true);
+ } else {
+ result_bps[0] =
+ do_memcpy_gettimeofday(routines[i].fn,
+ len, false);
+ result_bps[1] =
+ do_memcpy_gettimeofday(routines[i].fn,
+ len, true);
+ }
} else {
- BUG_ON(gettimeofday(&tv_end, NULL));
- timersub(&tv_end, &tv_start, &tv_diff);
- bps = (double)((double)length / timeval2double(&tv_diff));
+ if (use_clock) {
+ result_clock[pf] =
+ do_memcpy_clock(routines[i].fn,
+ len, only_prefault);
+ } else {
+ result_bps[pf] =
+ do_memcpy_gettimeofday(routines[i].fn,
+ len, only_prefault);
+ }
}
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
- if (use_clock) {
- printf(" %14lf Clock/Byte\n",
- (double)clock_diff / (double)length);
- } else {
- if (bps < K)
- printf(" %14lf B/Sec\n", bps);
- else if (bps < K * K)
- printf(" %14lfd KB/Sec\n", bps / 1024);
- else if (bps < K * K * K)
- printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
- else {
- printf(" %14lf GB/Sec\n",
- bps / 1024 / 1024 / 1024);
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)result_clock[0]
+ / (double)len);
+ printf(" %14lf Clock/Byte (with prefault)\n",
+ (double)result_clock[1]
+ / (double)len);
+ } else {
+ print_bps(result_bps[0]);
+ printf("\n");
+ print_bps(result_bps[1]);
+ printf(" (with prefault)\n");
}
+ } else {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte",
+ (double)result_clock[pf]
+ / (double)len);
+ } else
+ print_bps(result_bps[pf]);
+
+ printf("%s\n", only_prefault ? " (with prefault)" : "");
}
break;
case BENCH_FORMAT_SIMPLE:
- if (use_clock) {
- printf("%14lf\n",
- (double)clock_diff / (double)length);
- } else
- printf("%lf\n", bps);
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf("%lf %lf\n",
+ (double)result_clock[0] / (double)len,
+ (double)result_clock[1] / (double)len);
+ } else {
+ printf("%lf %lf\n",
+ result_bps[0], result_bps[1]);
+ }
+ } else {
+ if (use_clock) {
+ printf("%lf\n", (double)result_clock[pf]
+ / (double)len);
+ } else
+ printf("%lf\n", result_bps[pf]);
+ }
break;
default:
/* reaching this means there's some disaster: */
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 93bd2ff001fb..024e1441d76b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -61,6 +61,7 @@ static bool inherit_stat = false;
static bool no_samples = false;
static bool sample_address = false;
static bool no_buildid = false;
+static bool no_buildid_cache = false;
static long samples = 0;
static u64 bytes_written = 0;
@@ -326,7 +327,7 @@ try_again:
goto try_again;
}
printf("\n");
- error("perfcounter syscall returned with %d (%s)\n",
+ error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
fd[nr_cpu][counter][thread_index], strerror(err));
#if defined(__i386__) || defined(__x86_64__)
@@ -437,7 +438,8 @@ static void atexit_header(void)
if (!pipe_output) {
session->header.data_size += bytes_written;
- process_buildids();
+ if (!no_buildid)
+ process_buildids();
perf_header__write(&session->header, output, true);
perf_session__delete(session);
symbol__exit();
@@ -557,6 +559,9 @@ static int __cmd_record(int argc, const char **argv)
return -1;
}
+ if (!no_buildid)
+ perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+
if (!file_new) {
err = perf_header__read(session, output);
if (err < 0)
@@ -697,17 +702,18 @@ static int __cmd_record(int argc, const char **argv)
if (err < 0)
err = event__synthesize_kernel_mmap(process_synthesized_event,
session, machine, "_stext");
- if (err < 0) {
- pr_err("Couldn't record kernel reference relocation symbol.\n");
- return err;
- }
+ if (err < 0)
+ pr_err("Couldn't record kernel reference relocation symbol\n"
+ "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+ "Check /proc/kallsyms permission or run as root.\n");
err = event__synthesize_modules(process_synthesized_event,
session, machine);
- if (err < 0) {
- pr_err("Couldn't record kernel reference relocation symbol.\n");
- return err;
- }
+ if (err < 0)
+ pr_err("Couldn't record kernel module information.\n"
+ "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+ "Check /proc/modules permission or run as root.\n");
+
if (perf_guest)
perf_session__process_machines(session, event__synthesize_guest_os);
@@ -830,8 +836,10 @@ const struct option record_options[] = {
"Sample addresses"),
OPT_BOOLEAN('n', "no-samples", &no_samples,
"don't sample"),
- OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid,
+ OPT_BOOLEAN('N', "no-buildid-cache", &no_buildid_cache,
"do not update the buildid cache"),
+ OPT_BOOLEAN('B', "no-buildid", &no_buildid,
+ "do not collect buildids in perf.data"),
OPT_END()
};
@@ -856,7 +864,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
}
symbol__init();
- if (no_buildid)
+
+ if (no_buildid_cache || no_buildid)
disable_buildid_cache();
if (!nr_counters) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 492d19d86534..4539551ab40e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -301,17 +301,34 @@ static int parse_scriptname(const struct option *opt __used,
return 0;
}
-#define for_each_lang(scripts_dir, lang_dirent, lang_next) \
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+static int is_directory(const char *base_path, const struct dirent *dent)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ sprintf(path, "%s/%s", base_path, dent->d_name);
+ if (stat(path, &st))
+ return 0;
+
+ return S_ISDIR(st.st_mode);
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\
while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \
lang_next) \
- if (lang_dirent.d_type == DT_DIR && \
+ if ((lang_dirent.d_type == DT_DIR || \
+ (lang_dirent.d_type == DT_UNKNOWN && \
+ is_directory(scripts_path, &lang_dirent))) && \
(strcmp(lang_dirent.d_name, ".")) && \
(strcmp(lang_dirent.d_name, "..")))
-#define for_each_script(lang_dir, script_dirent, script_next) \
+#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\
while (!readdir_r(lang_dir, &script_dirent, &script_next) && \
script_next) \
- if (script_dirent.d_type != DT_DIR)
+ if (script_dirent.d_type != DT_DIR && \
+ (script_dirent.d_type != DT_UNKNOWN || \
+ !is_directory(lang_path, &script_dirent)))
#define RECORD_SUFFIX "-record"
@@ -466,14 +483,14 @@ static int list_available_scripts(const struct option *opt __used,
if (!scripts_dir)
return -1;
- for_each_lang(scripts_dir, lang_dirent, lang_next) {
+ for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
lang_dirent.d_name);
lang_dir = opendir(lang_path);
if (!lang_dir)
continue;
- for_each_script(lang_dir, script_dirent, script_next) {
+ for_each_script(lang_path, lang_dir, script_dirent, script_next) {
script_root = strdup(script_dirent.d_name);
str = ends_with(script_root, REPORT_SUFFIX);
if (str) {
@@ -514,14 +531,14 @@ static char *get_script_path(const char *script_root, const char *suffix)
if (!scripts_dir)
return NULL;
- for_each_lang(scripts_dir, lang_dirent, lang_next) {
+ for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) {
snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
lang_dirent.d_name);
lang_dir = opendir(lang_path);
if (!lang_dir)
continue;
- for_each_script(lang_dir, script_dirent, script_next) {
+ for_each_script(lang_path, lang_dir, script_dirent, script_next) {
__script_root = strdup(script_dirent.d_name);
str = ends_with(__script_root, suffix);
if (str) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6b4d44f9502..970a7f2a083d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -75,6 +75,7 @@ static int run_idx = 0;
static int run_count = 1;
static bool no_inherit = false;
static bool scale = true;
+static bool no_aggr = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
static pid_t *all_tids = NULL;
@@ -89,6 +90,12 @@ static int *fd[MAX_NR_CPUS][MAX_COUNTERS];
static int event_scaled[MAX_COUNTERS];
+static struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+} cpu_counts[MAX_NR_CPUS][MAX_COUNTERS];
+
static volatile int done = 0;
struct stats
@@ -136,19 +143,19 @@ static double stddev_stats(struct stats *stats)
}
struct stats event_res_stats[MAX_COUNTERS][3];
-struct stats runtime_nsecs_stats;
+struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+struct stats runtime_cycles_stats[MAX_NR_CPUS];
+struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats walltime_nsecs_stats;
-struct stats runtime_cycles_stats;
-struct stats runtime_branches_stats;
#define MATCH_EVENT(t, c, counter) \
(attrs[counter].type == PERF_TYPE_##t && \
attrs[counter].config == PERF_COUNT_##c)
#define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
+"counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information."
-static int create_perf_stat_counter(int counter)
+static int create_perf_stat_counter(int counter, bool *perm_err)
{
struct perf_event_attr *attr = attrs + counter;
int thread;
@@ -164,11 +171,14 @@ static int create_perf_stat_counter(int counter)
for (cpu = 0; cpu < nr_cpus; cpu++) {
fd[cpu][counter][0] = sys_perf_event_open(attr,
-1, cpumap[cpu], -1, 0);
- if (fd[cpu][counter][0] < 0)
- pr_debug(ERR_PERF_OPEN, counter,
+ if (fd[cpu][counter][0] < 0) {
+ if (errno == EPERM || errno == EACCES)
+ *perm_err = true;
+ error(ERR_PERF_OPEN, counter,
fd[cpu][counter][0], strerror(errno));
- else
+ } else {
++ncreated;
+ }
}
} else {
attr->inherit = !no_inherit;
@@ -179,12 +189,15 @@ static int create_perf_stat_counter(int counter)
for (thread = 0; thread < thread_num; thread++) {
fd[0][counter][thread] = sys_perf_event_open(attr,
all_tids[thread], -1, -1, 0);
- if (fd[0][counter][thread] < 0)
- pr_debug(ERR_PERF_OPEN, counter,
+ if (fd[0][counter][thread] < 0) {
+ if (errno == EPERM || errno == EACCES)
+ *perm_err = true;
+ error(ERR_PERF_OPEN, counter,
fd[0][counter][thread],
strerror(errno));
- else
+ } else {
++ncreated;
+ }
}
}
@@ -205,8 +218,9 @@ static inline int nsec_counter(int counter)
/*
* Read out the results of a single counter:
+ * aggregate counts across CPUs in system-wide mode
*/
-static void read_counter(int counter)
+static void read_counter_aggr(int counter)
{
u64 count[3], single_count[3];
int cpu;
@@ -264,11 +278,58 @@ static void read_counter(int counter)
* Save the full runtime - to allow normalization during printout:
*/
if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
- update_stats(&runtime_nsecs_stats, count[0]);
+ update_stats(&runtime_nsecs_stats[0], count[0]);
if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
- update_stats(&runtime_cycles_stats, count[0]);
+ update_stats(&runtime_cycles_stats[0], count[0]);
if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
- update_stats(&runtime_branches_stats, count[0]);
+ update_stats(&runtime_branches_stats[0], count[0]);
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static void read_counter(int counter)
+{
+ u64 count[3];
+ int cpu;
+ size_t res, nv;
+
+ count[0] = count[1] = count[2] = 0;
+
+ nv = scale ? 3 : 1;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+
+ if (fd[cpu][counter][0] < 0)
+ continue;
+
+ res = read(fd[cpu][counter][0], count, nv * sizeof(u64));
+
+ assert(res == nv * sizeof(u64));
+
+ close(fd[cpu][counter][0]);
+ fd[cpu][counter][0] = -1;
+
+ if (scale) {
+ if (count[2] == 0) {
+ count[0] = 0;
+ } else if (count[2] < count[1]) {
+ count[0] = (unsigned long long)
+ ((double)count[0] * count[1] / count[2] + 0.5);
+ }
+ }
+ cpu_counts[cpu][counter].val = count[0]; /* scaled count */
+ cpu_counts[cpu][counter].ena = count[1];
+ cpu_counts[cpu][counter].run = count[2];
+
+ if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+ update_stats(&runtime_nsecs_stats[cpu], count[0]);
+ if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
+ update_stats(&runtime_cycles_stats[cpu], count[0]);
+ if (MATCH_EVENT(HARDWARE, HW_BRANCH_INSTRUCTIONS, counter))
+ update_stats(&runtime_branches_stats[cpu], count[0]);
+ }
}
static int run_perf_stat(int argc __used, const char **argv)
@@ -277,6 +338,7 @@ static int run_perf_stat(int argc __used, const char **argv)
int status = 0;
int counter, ncreated = 0;
int child_ready_pipe[2], go_pipe[2];
+ bool perm_err = false;
const bool forks = (argc > 0);
char buf;
@@ -335,12 +397,15 @@ static int run_perf_stat(int argc __used, const char **argv)
}
for (counter = 0; counter < nr_counters; counter++)
- ncreated += create_perf_stat_counter(counter);
-
- if (ncreated == 0) {
- pr_err("No permission to collect %sstats.\n"
- "Consider tweaking /proc/sys/kernel/perf_event_paranoid.\n",
- system_wide ? "system-wide " : "");
+ ncreated += create_perf_stat_counter(counter, &perm_err);
+
+ if (ncreated < nr_counters) {
+ if (perm_err)
+ error("You may not have permission to collect %sstats.\n"
+ "\t Consider tweaking"
+ " /proc/sys/kernel/perf_event_paranoid or running as root.",
+ system_wide ? "system-wide " : "");
+ die("Not all events could be opened.\n");
if (child_pid != -1)
kill(child_pid, SIGTERM);
return -1;
@@ -362,9 +427,13 @@ static int run_perf_stat(int argc __used, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
- for (counter = 0; counter < nr_counters; counter++)
- read_counter(counter);
-
+ if (no_aggr) {
+ for (counter = 0; counter < nr_counters; counter++)
+ read_counter(counter);
+ } else {
+ for (counter = 0; counter < nr_counters; counter++)
+ read_counter_aggr(counter);
+ }
return WEXITSTATUS(status);
}
@@ -377,11 +446,15 @@ static void print_noise(int counter, double avg)
100 * stddev_stats(&event_res_stats[counter][0]) / avg);
}
-static void nsec_printout(int counter, double avg)
+static void nsec_printout(int cpu, int counter, double avg)
{
double msecs = avg / 1e6;
- fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter));
+ if (no_aggr)
+ fprintf(stderr, "CPU%-4d %18.6f %-24s",
+ cpumap[cpu], msecs, event_name(counter));
+ else
+ fprintf(stderr, " %18.6f %-24s", msecs, event_name(counter));
if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
fprintf(stderr, " # %10.3f CPUs ",
@@ -389,33 +462,41 @@ static void nsec_printout(int counter, double avg)
}
}
-static void abs_printout(int counter, double avg)
+static void abs_printout(int cpu, int counter, double avg)
{
double total, ratio = 0.0;
+ char cpustr[16] = { '\0', };
+
+ if (no_aggr)
+ sprintf(cpustr, "CPU%-4d", cpumap[cpu]);
+ else
+ cpu = 0;
if (big_num)
- fprintf(stderr, " %'18.0f %-24s", avg, event_name(counter));
+ fprintf(stderr, "%s %'18.0f %-24s",
+ cpustr, avg, event_name(counter));
else
- fprintf(stderr, " %18.0f %-24s", avg, event_name(counter));
+ fprintf(stderr, "%s %18.0f %-24s",
+ cpustr, avg, event_name(counter));
if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
- total = avg_stats(&runtime_cycles_stats);
+ total = avg_stats(&runtime_cycles_stats[cpu]);
if (total)
ratio = avg / total;
fprintf(stderr, " # %10.3f IPC ", ratio);
} else if (MATCH_EVENT(HARDWARE, HW_BRANCH_MISSES, counter) &&
- runtime_branches_stats.n != 0) {
- total = avg_stats(&runtime_branches_stats);
+ runtime_branches_stats[cpu].n != 0) {
+ total = avg_stats(&runtime_branches_stats[cpu]);
if (total)
ratio = avg * 100 / total;
fprintf(stderr, " # %10.3f %% ", ratio);
- } else if (runtime_nsecs_stats.n != 0) {
- total = avg_stats(&runtime_nsecs_stats);
+ } else if (runtime_nsecs_stats[cpu].n != 0) {
+ total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total)
ratio = 1000.0 * avg / total;
@@ -426,8 +507,9 @@ static void abs_printout(int counter, double avg)
/*
* Print out the results of a single counter:
+ * aggregated counts in system-wide mode
*/
-static void print_counter(int counter)
+static void print_counter_aggr(int counter)
{
double avg = avg_stats(&event_res_stats[counter][0]);
int scaled = event_scaled[counter];
@@ -439,9 +521,9 @@ static void print_counter(int counter)
}
if (nsec_counter(counter))
- nsec_printout(counter, avg);
+ nsec_printout(-1, counter, avg);
else
- abs_printout(counter, avg);
+ abs_printout(-1, counter, avg);
print_noise(counter, avg);
@@ -458,6 +540,42 @@ static void print_counter(int counter)
fprintf(stderr, "\n");
}
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(int counter)
+{
+ u64 ena, run, val;
+ int cpu;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ val = cpu_counts[cpu][counter].val;
+ ena = cpu_counts[cpu][counter].ena;
+ run = cpu_counts[cpu][counter].run;
+ if (run == 0 || ena == 0) {
+ fprintf(stderr, "CPU%-4d %18s %-24s", cpumap[cpu],
+ "<not counted>", event_name(counter));
+
+ fprintf(stderr, "\n");
+ continue;
+ }
+
+ if (nsec_counter(counter))
+ nsec_printout(cpu, counter, val);
+ else
+ abs_printout(cpu, counter, val);
+
+ print_noise(counter, 1.0);
+
+ if (run != ena) {
+ fprintf(stderr, " (scaled from %.2f%%)",
+ 100.0 * run / ena);
+ }
+ fprintf(stderr, "\n");
+ }
+}
+
static void print_stat(int argc, const char **argv)
{
int i, counter;
@@ -480,8 +598,13 @@ static void print_stat(int argc, const char **argv)
fprintf(stderr, " (%d runs)", run_count);
fprintf(stderr, ":\n\n");
- for (counter = 0; counter < nr_counters; counter++)
- print_counter(counter);
+ if (no_aggr) {
+ for (counter = 0; counter < nr_counters; counter++)
+ print_counter(counter);
+ } else {
+ for (counter = 0; counter < nr_counters; counter++)
+ print_counter_aggr(counter);
+ }
fprintf(stderr, "\n");
fprintf(stderr, " %18.9f seconds time elapsed",
@@ -545,6 +668,8 @@ static const struct option options[] = {
"print large numbers with thousands\' separators"),
OPT_STRING('C', "cpu", &cpu_list, "cpu",
"list of cpus to monitor in system-wide"),
+ OPT_BOOLEAN('A', "no-aggr", &no_aggr,
+ "disable CPU count aggregation"),
OPT_END()
};
@@ -562,6 +687,10 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (run_count <= 0)
usage_with_options(stat_usage, options);
+ /* no_aggr is for system-wide only */
+ if (no_aggr && !system_wide)
+ usage_with_options(stat_usage, options);
+
/* Set attrs and nr_counters if no event is selected and !null_run */
if (!null_run && !nr_counters) {
memcpy(attrs, default_attrs, sizeof(default_attrs));
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd625808c2a5..3d2b47d5121a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1214,7 +1214,9 @@ try_again:
int err = errno;
if (err == EPERM || err == EACCES)
- die("No permission - are you root?\n");
+ die("Permission error - are you root?\n"
+ "\t Consider tweaking"
+ " /proc/sys/kernel/perf_event_paranoid.\n");
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
@@ -1231,7 +1233,7 @@ try_again:
goto try_again;
}
printf("\n");
- error("perfcounter syscall returned with %d (%s)\n",
+ error("sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information.\n",
fd[i][counter][thread_index], strerror(err));
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index 949d77fc0b97..16b5088cf8f4 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -16,7 +16,7 @@ perf-report mainporcelain common
perf-stat mainporcelain common
perf-timechart mainporcelain common
perf-top mainporcelain common
-perf-trace mainporcelain common
+perf-script mainporcelain common
perf-probe mainporcelain common
perf-kmem mainporcelain common
perf-lock mainporcelain common
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index b253db634f04..b041ca67a2cb 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -9,8 +9,8 @@ endef
ifndef NO_DWARF
define SOURCE_DWARF
#include <dwarf.h>
-#include <libdw.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
#ifndef _ELFUTILS_PREREQ
#error
#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index c8d81b00089d..01bbe8ecec3f 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -46,20 +46,16 @@ int dump_printf(const char *fmt, ...)
return ret;
}
-static int dump_printf_color(const char *fmt, const char *color, ...)
+#ifdef NO_NEWT_SUPPORT
+void ui__warning(const char *format, ...)
{
va_list args;
- int ret = 0;
- if (dump_trace) {
- va_start(args, color);
- ret = color_vfprintf(stdout, color, fmt, args);
- va_end(args);
- }
-
- return ret;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
}
-
+#endif
void trace_event(event_t *event)
{
@@ -70,29 +66,29 @@ void trace_event(event_t *event)
if (!dump_trace)
return;
- dump_printf(".");
- dump_printf_color("\n. ... raw event: size %d bytes\n", color,
- event->header.size);
+ printf(".");
+ color_fprintf(stdout, color, "\n. ... raw event: size %d bytes\n",
+ event->header.size);
for (i = 0; i < event->header.size; i++) {
if ((i & 15) == 0) {
- dump_printf(".");
- dump_printf_color(" %04x: ", color, i);
+ printf(".");
+ color_fprintf(stdout, color, " %04x: ", i);
}
- dump_printf_color(" %02x", color, raw_event[i]);
+ color_fprintf(stdout, color, " %02x", raw_event[i]);
if (((i & 15) == 15) || i == event->header.size-1) {
- dump_printf_color(" ", color);
+ color_fprintf(stdout, color, " ");
for (j = 0; j < 15-(i & 15); j++)
- dump_printf_color(" ", color);
+ color_fprintf(stdout, color, " ");
for (j = i & ~15; j <= i; j++) {
- dump_printf_color("%c", color,
- isprint(raw_event[j]) ?
- raw_event[j] : '.');
+ color_fprintf(stdout, color, "%c",
+ isprint(raw_event[j]) ?
+ raw_event[j] : '.');
}
- dump_printf_color("\n", color);
+ color_fprintf(stdout, color, "\n");
}
}
- dump_printf(".\n");
+ printf(".\n");
}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
index 7b514082bbaf..ca35fd66b5df 100644
--- a/tools/perf/util/debug.h
+++ b/tools/perf/util/debug.h
@@ -35,4 +35,6 @@ int ui_helpline__show_help(const char *format, va_list ap);
#include "ui/progress.h"
#endif
+void ui__warning(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index dab9e754a281..7260db75b93d 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -392,7 +392,7 @@ static void event_set_kernel_mmap_len(struct map **maps, event_t *self)
* a zero sized synthesized MMAP event for the kernel.
*/
if (maps[MAP__FUNCTION]->end == 0)
- maps[MAP__FUNCTION]->end = ~0UL;
+ maps[MAP__FUNCTION]->end = ~0ULL;
}
static int event__process_kernel_mmap(event_t *self,
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index d7e67b167ea3..f65d7dc127b6 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -152,6 +152,11 @@ void perf_header__set_feat(struct perf_header *self, int feat)
set_bit(feat, self->adds_features);
}
+void perf_header__clear_feat(struct perf_header *self, int feat)
+{
+ clear_bit(feat, self->adds_features);
+}
+
bool perf_header__has_feat(const struct perf_header *self, int feat)
{
return test_bit(feat, self->adds_features);
@@ -431,8 +436,10 @@ static int perf_header__adds_write(struct perf_header *self, int fd)
int idx = 0, err;
session = container_of(self, struct perf_session, header);
- if (perf_session__read_build_ids(session, true))
- perf_header__set_feat(self, HEADER_BUILD_ID);
+
+ if (perf_header__has_feat(self, HEADER_BUILD_ID &&
+ !perf_session__read_build_ids(session, true)))
+ perf_header__clear_feat(self, HEADER_BUILD_ID);
nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 402ac2454cf8..ed550bffd655 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -84,6 +84,7 @@ u64 perf_header__sample_type(struct perf_header *header);
struct perf_event_attr *
perf_header__find_attr(u64 id, struct perf_header *header);
void perf_header__set_feat(struct perf_header *self, int feat);
+void perf_header__clear_feat(struct perf_header *self, int feat);
bool perf_header__has_feat(const struct perf_header *self, int feat);
int perf_header__process_sections(struct perf_header *self, int fd,
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000000..acffd5e4d1d4
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,9 @@
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000000..bb4198e7837a
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,11 @@
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+
+#endif /* PERF_DWARF2_H */
+
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index bb4ac2e05385..8be0b968ca0b 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -13,6 +13,11 @@ static inline void set_bit(int nr, unsigned long *addr)
addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
}
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
+}
+
static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
{
return ((1UL << (nr % BITS_PER_LONG)) &
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000000..06387cffe125
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,13 @@
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name) \
+ .globl name; \
+ name:
+
+#define ENDPROC(name)
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4af5bd59cfd1..c305305a3884 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -434,7 +434,7 @@ parse_single_tracepoint_event(char *sys_name,
id = atoll(id_buf);
attr->config = id;
attr->type = PERF_TYPE_TRACEPOINT;
- *strp = evt_name + evt_length;
+ *strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
attr->sample_type |= PERF_SAMPLE_RAW;
attr->sample_type |= PERF_SAMPLE_TIME;
@@ -495,7 +495,7 @@ static enum event_result parse_tracepoint_event(const char **strp,
struct perf_event_attr *attr)
{
const char *evt_name;
- char *flags;
+ char *flags = NULL, *comma_loc;
char sys_name[MAX_EVENT_LENGTH];
unsigned int sys_length, evt_length;
@@ -514,6 +514,11 @@ static enum event_result parse_tracepoint_event(const char **strp,
sys_name[sys_length] = '\0';
evt_name = evt_name + 1;
+ comma_loc = strchr(evt_name, ',');
+ if (comma_loc) {
+ /* take the event name up to the comma */
+ evt_name = strndup(evt_name, comma_loc - evt_name);
+ }
flags = strchr(evt_name, ':');
if (flags) {
/* split it out: */
@@ -524,9 +529,8 @@ static enum event_result parse_tracepoint_event(const char **strp,
evt_length = strlen(evt_name);
if (evt_length >= MAX_EVENT_LENGTH)
return EVT_FAILED;
-
if (strpbrk(evt_name, "*?")) {
- *strp = evt_name + evt_length;
+ *strp += strlen(sys_name) + evt_length;
return parse_multiple_tracepoint_event(sys_name, evt_name,
flags);
} else
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index bba69d455699..beaefc3c1223 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,9 +34,9 @@ extern int find_available_vars_at(int fd, struct perf_probe_event *pev,
bool externs);
#include <dwarf.h>
-#include <libdw.h>
-#include <libdwfl.h>
-#include <version.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
struct probe_finder {
struct perf_probe_event *pev; /* Target probe event */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index fa9d652c2dc3..52672dad1fe9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -101,10 +101,20 @@ struct perf_session *perf_session__new(const char *filename, int mode, bool forc
INIT_LIST_HEAD(&self->dead_threads);
self->hists_tree = RB_ROOT;
self->last_match = NULL;
- self->mmap_window = 32;
+ /*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+ self->mmap_window = ULLONG_MAX;
+#else
+ self->mmap_window = 32 * 1024 * 1024ULL;
+#endif
self->machines = RB_ROOT;
self->repipe = repipe;
- INIT_LIST_HEAD(&self->ordered_samples.samples_head);
+ INIT_LIST_HEAD(&self->ordered_samples.samples);
+ INIT_LIST_HEAD(&self->ordered_samples.sample_cache);
+ INIT_LIST_HEAD(&self->ordered_samples.to_free);
machine__init(&self->host_machine, "", HOST_KERNEL_ID);
if (mode == O_RDONLY) {
@@ -262,7 +272,7 @@ static void perf_event_ops__fill_defaults(struct perf_event_ops *handler)
if (handler->exit == NULL)
handler->exit = process_event_stub;
if (handler->lost == NULL)
- handler->lost = process_event_stub;
+ handler->lost = event__process_lost;
if (handler->read == NULL)
handler->read = process_event_stub;
if (handler->throttle == NULL)
@@ -386,33 +396,51 @@ static event__swap_op event__swap_ops[] = {
struct sample_queue {
u64 timestamp;
- struct sample_event *event;
+ event_t *event;
struct list_head list;
};
+static void perf_session_free_sample_buffers(struct perf_session *session)
+{
+ struct ordered_samples *os = &session->ordered_samples;
+
+ while (!list_empty(&os->to_free)) {
+ struct sample_queue *sq;
+
+ sq = list_entry(os->to_free.next, struct sample_queue, list);
+ list_del(&sq->list);
+ free(sq);
+ }
+}
+
static void flush_sample_queue(struct perf_session *s,
struct perf_event_ops *ops)
{
- struct list_head *head = &s->ordered_samples.samples_head;
- u64 limit = s->ordered_samples.next_flush;
+ struct ordered_samples *os = &s->ordered_samples;
+ struct list_head *head = &os->samples;
struct sample_queue *tmp, *iter;
+ u64 limit = os->next_flush;
+ u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
if (!ops->ordered_samples || !limit)
return;
list_for_each_entry_safe(iter, tmp, head, list) {
if (iter->timestamp > limit)
- return;
-
- if (iter == s->ordered_samples.last_inserted)
- s->ordered_samples.last_inserted = NULL;
+ break;
- ops->sample((event_t *)iter->event, s);
+ ops->sample(iter->event, s);
- s->ordered_samples.last_flush = iter->timestamp;
+ os->last_flush = iter->timestamp;
list_del(&iter->list);
- free(iter->event);
- free(iter);
+ list_add(&iter->list, &os->sample_cache);
+ }
+
+ if (list_empty(head)) {
+ os->last_sample = NULL;
+ } else if (last_ts <= limit) {
+ os->last_sample =
+ list_entry(head->prev, struct sample_queue, list);
}
}
@@ -465,104 +493,87 @@ static int process_finished_round(event_t *event __used,
return 0;
}
-static void __queue_sample_end(struct sample_queue *new, struct list_head *head)
-{
- struct sample_queue *iter;
-
- list_for_each_entry_reverse(iter, head, list) {
- if (iter->timestamp < new->timestamp) {
- list_add(&new->list, &iter->list);
- return;
- }
- }
-
- list_add(&new->list, head);
-}
-
-static void __queue_sample_before(struct sample_queue *new,
- struct sample_queue *iter,
- struct list_head *head)
-{
- list_for_each_entry_continue_reverse(iter, head, list) {
- if (iter->timestamp < new->timestamp) {
- list_add(&new->list, &iter->list);
- return;
- }
- }
-
- list_add(&new->list, head);
-}
-
-static void __queue_sample_after(struct sample_queue *new,
- struct sample_queue *iter,
- struct list_head *head)
-{
- list_for_each_entry_continue(iter, head, list) {
- if (iter->timestamp > new->timestamp) {
- list_add_tail(&new->list, &iter->list);
- return;
- }
- }
- list_add_tail(&new->list, head);
-}
-
/* The queue is ordered by time */
static void __queue_sample_event(struct sample_queue *new,
struct perf_session *s)
{
- struct sample_queue *last_inserted = s->ordered_samples.last_inserted;
- struct list_head *head = &s->ordered_samples.samples_head;
+ struct ordered_samples *os = &s->ordered_samples;
+ struct sample_queue *sample = os->last_sample;
+ u64 timestamp = new->timestamp;
+ struct list_head *p;
+ os->last_sample = new;
- if (!last_inserted) {
- __queue_sample_end(new, head);
+ if (!sample) {
+ list_add(&new->list, &os->samples);
+ os->max_timestamp = timestamp;
return;
}
/*
- * Most of the time the current event has a timestamp
- * very close to the last event inserted, unless we just switched
- * to another event buffer. Having a sorting based on a list and
- * on the last inserted event that is close to the current one is
- * probably more efficient than an rbtree based sorting.
+ * last_sample might point to some random place in the list as it's
+ * the last queued event. We expect that the new event is close to
+ * this.
*/
- if (last_inserted->timestamp >= new->timestamp)
- __queue_sample_before(new, last_inserted, head);
- else
- __queue_sample_after(new, last_inserted, head);
+ if (sample->timestamp <= timestamp) {
+ while (sample->timestamp <= timestamp) {
+ p = sample->list.next;
+ if (p == &os->samples) {
+ list_add_tail(&new->list, &os->samples);
+ os->max_timestamp = timestamp;
+ return;
+ }
+ sample = list_entry(p, struct sample_queue, list);
+ }
+ list_add_tail(&new->list, &sample->list);
+ } else {
+ while (sample->timestamp > timestamp) {
+ p = sample->list.prev;
+ if (p == &os->samples) {
+ list_add(&new->list, &os->samples);
+ return;
+ }
+ sample = list_entry(p, struct sample_queue, list);
+ }
+ list_add(&new->list, &sample->list);
+ }
}
+#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct sample_queue))
+
static int queue_sample_event(event_t *event, struct sample_data *data,
struct perf_session *s)
{
+ struct ordered_samples *os = &s->ordered_samples;
+ struct list_head *sc = &os->sample_cache;
u64 timestamp = data->time;
struct sample_queue *new;
-
if (timestamp < s->ordered_samples.last_flush) {
printf("Warning: Timestamp below last timeslice flush\n");
return -EINVAL;
}
- new = malloc(sizeof(*new));
- if (!new)
- return -ENOMEM;
-
- new->timestamp = timestamp;
-
- new->event = malloc(event->header.size);
- if (!new->event) {
- free(new);
- return -ENOMEM;
+ if (!list_empty(sc)) {
+ new = list_entry(sc->next, struct sample_queue, list);
+ list_del(&new->list);
+ } else if (os->sample_buffer) {
+ new = os->sample_buffer + os->sample_buffer_idx;
+ if (++os->sample_buffer_idx == MAX_SAMPLE_BUFFER)
+ os->sample_buffer = NULL;
+ } else {
+ os->sample_buffer = malloc(MAX_SAMPLE_BUFFER * sizeof(*new));
+ if (!os->sample_buffer)
+ return -ENOMEM;
+ list_add(&os->sample_buffer->list, &os->to_free);
+ os->sample_buffer_idx = 2;
+ new = os->sample_buffer + 1;
}
- memcpy(new->event, event, event->header.size);
+ new->timestamp = timestamp;
+ new->event = event;
__queue_sample_event(new, s);
- s->ordered_samples.last_inserted = new;
-
- if (new->timestamp > s->ordered_samples.max_timestamp)
- s->ordered_samples.max_timestamp = new->timestamp;
return 0;
}
@@ -586,13 +597,13 @@ static int perf_session__process_sample(event_t *event, struct perf_session *s,
static int perf_session__process_event(struct perf_session *self,
event_t *event,
struct perf_event_ops *ops,
- u64 offset, u64 head)
+ u64 file_offset)
{
trace_event(event);
if (event->header.type < PERF_RECORD_HEADER_MAX) {
dump_printf("%#Lx [%#x]: PERF_RECORD_%s",
- offset + head, event->header.size,
+ file_offset, event->header.size,
event__name[event->header.type]);
hists__inc_nr_events(&self->hists, event->header.type);
}
@@ -625,7 +636,7 @@ static int perf_session__process_event(struct perf_session *self,
return ops->event_type(event, self);
case PERF_RECORD_HEADER_TRACING_DATA:
/* setup for reading amidst mmap */
- lseek(self->fd, offset + head, SEEK_SET);
+ lseek(self->fd, file_offset, SEEK_SET);
return ops->tracing_data(event, self);
case PERF_RECORD_HEADER_BUILD_ID:
return ops->build_id(event, self);
@@ -724,8 +735,7 @@ more:
}
if (size == 0 ||
- (skip = perf_session__process_event(self, &event, ops,
- 0, head)) < 0) {
+ (skip = perf_session__process_event(self, &event, ops, head)) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
head, event.header.size, event.header.type);
/*
@@ -751,82 +761,93 @@ more:
done:
err = 0;
out_err:
+ perf_session_free_sample_buffers(self);
return err;
}
-int __perf_session__process_events(struct perf_session *self,
+int __perf_session__process_events(struct perf_session *session,
u64 data_offset, u64 data_size,
u64 file_size, struct perf_event_ops *ops)
{
- int err, mmap_prot, mmap_flags;
- u64 head, shift;
- u64 offset = 0;
- size_t page_size;
+ u64 head, page_offset, file_offset, file_pos, progress_next;
+ int err, mmap_prot, mmap_flags, map_idx = 0;
+ struct ui_progress *progress;
+ size_t page_size, mmap_size;
+ char *buf, *mmaps[8];
event_t *event;
uint32_t size;
- char *buf;
- struct ui_progress *progress = ui_progress__new("Processing events...",
- self->size);
- if (progress == NULL)
- return -1;
perf_event_ops__fill_defaults(ops);
page_size = sysconf(_SC_PAGESIZE);
- head = data_offset;
- shift = page_size * (head / page_size);
- offset += shift;
- head -= shift;
+ page_offset = page_size * (data_offset / page_size);
+ file_offset = page_offset;
+ head = data_offset - page_offset;
+
+ if (data_offset + data_size < file_size)
+ file_size = data_offset + data_size;
+
+ progress_next = file_size / 16;
+ progress = ui_progress__new("Processing events...", file_size);
+ if (progress == NULL)
+ return -1;
+
+ mmap_size = session->mmap_window;
+ if (mmap_size > file_size)
+ mmap_size = file_size;
+
+ memset(mmaps, 0, sizeof(mmaps));
mmap_prot = PROT_READ;
mmap_flags = MAP_SHARED;
- if (self->header.needs_swap) {
+ if (session->header.needs_swap) {
mmap_prot |= PROT_WRITE;
mmap_flags = MAP_PRIVATE;
}
remap:
- buf = mmap(NULL, page_size * self->mmap_window, mmap_prot,
- mmap_flags, self->fd, offset);
+ buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, session->fd,
+ file_offset);
if (buf == MAP_FAILED) {
pr_err("failed to mmap file\n");
err = -errno;
goto out_err;
}
+ mmaps[map_idx] = buf;
+ map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+ file_pos = file_offset + head;
more:
event = (event_t *)(buf + head);
- ui_progress__update(progress, offset);
- if (self->header.needs_swap)
+ if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
size = event->header.size;
if (size == 0)
size = 8;
- if (head + event->header.size >= page_size * self->mmap_window) {
- int munmap_ret;
-
- shift = page_size * (head / page_size);
-
- munmap_ret = munmap(buf, page_size * self->mmap_window);
- assert(munmap_ret == 0);
+ if (head + event->header.size >= mmap_size) {
+ if (mmaps[map_idx]) {
+ munmap(mmaps[map_idx], mmap_size);
+ mmaps[map_idx] = NULL;
+ }
- offset += shift;
- head -= shift;
+ page_offset = page_size * (head / page_size);
+ file_offset += page_offset;
+ head -= page_offset;
goto remap;
}
size = event->header.size;
dump_printf("\n%#Lx [%#x]: event: %d\n",
- offset + head, event->header.size, event->header.type);
+ file_pos, event->header.size, event->header.type);
if (size == 0 ||
- perf_session__process_event(self, event, ops, offset, head) < 0) {
+ perf_session__process_event(session, event, ops, file_pos) < 0) {
dump_printf("%#Lx [%#x]: skipping unknown header type: %d\n",
- offset + head, event->header.size,
+ file_offset + head, event->header.size,
event->header.type);
/*
* assume we lost track of the stream, check alignment, and
@@ -839,19 +860,41 @@ more:
}
head += size;
+ file_pos += size;
- if (offset + head >= data_offset + data_size)
- goto done;
+ if (file_pos >= progress_next) {
+ progress_next += file_size / 16;
+ ui_progress__update(progress, file_pos);
+ }
- if (offset + head < file_size)
+ if (file_pos < file_size)
goto more;
-done:
+
err = 0;
/* do the final flush for ordered samples */
- self->ordered_samples.next_flush = ULLONG_MAX;
- flush_sample_queue(self, ops);
+ session->ordered_samples.next_flush = ULLONG_MAX;
+ flush_sample_queue(session, ops);
out_err:
ui_progress__delete(progress);
+
+ if (ops->lost == event__process_lost &&
+ session->hists.stats.total_lost != 0) {
+ ui__warning("Processed %Lu events and LOST %Lu!\n\n"
+ "Check IO/CPU overload!\n\n",
+ session->hists.stats.total_period,
+ session->hists.stats.total_lost);
+ }
+
+ if (session->hists.stats.nr_unknown_events != 0) {
+ ui__warning("Found %u unknown events!\n\n"
+ "Is this an older tool processing a perf.data "
+ "file generated by a more recent tool?\n\n"
+ "If that is not the case, consider "
+ "reporting to linux-kernel@vger.kernel.org.\n\n",
+ session->hists.stats.nr_unknown_events);
+ }
+
+ perf_session_free_sample_buffers(session);
return err;
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9fa0fc2a863f..5bf6efa3788a 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -17,8 +17,12 @@ struct ordered_samples {
u64 last_flush;
u64 next_flush;
u64 max_timestamp;
- struct list_head samples_head;
- struct sample_queue *last_inserted;
+ struct list_head samples;
+ struct list_head sample_cache;
+ struct list_head to_free;
+ struct sample_queue *sample_buffer;
+ struct sample_queue *last_sample;
+ int sample_buffer_idx;
};
struct perf_session {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b39f499e575a..a348906b587d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -121,7 +121,7 @@ static void __map_groups__fixup_end(struct map_groups *self, enum map_type type)
* We still haven't the actual symbols, so guess the
* last map final address.
*/
- curr->end = ~0UL;
+ curr->end = ~0ULL;
}
static void map_groups__fixup_end(struct map_groups *self)
@@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
{
struct rb_node **p = &self->rb_node;
struct rb_node *parent = NULL;
- struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s;
+ struct symbol_name_rb_node *symn, *s;
+
+ symn = container_of(sym, struct symbol_name_rb_node, sym);
while (*p != NULL) {
parent = *p;
diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c
index 056c69521a38..7b5a8926624e 100644
--- a/tools/perf/util/ui/util.c
+++ b/tools/perf/util/ui/util.c
@@ -104,10 +104,24 @@ out_destroy_form:
return rc;
}
-static const char yes[] = "Yes", no[] = "No";
+static const char yes[] = "Yes", no[] = "No",
+ warning_str[] = "Warning!", ok[] = "Ok";
bool ui__dialog_yesno(const char *msg)
{
/* newtWinChoice should really be accepting const char pointers... */
return newtWinChoice(NULL, (char *)yes, (char *)no, (char *)msg) == 1;
}
+
+void ui__warning(const char *format, ...)
+{
+ va_list args;
+
+ va_start(args, format);
+ if (use_browser > 0)
+ newtWinMessagev((char *)warning_str, (char *)ok,
+ (char *)format, args);
+ else
+ vfprintf(stderr, format, args);
+ va_end(args);
+}
OpenPOWER on IntegriCloud