summaryrefslogtreecommitdiffstats
path: root/tools/perf/examples/bpf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/examples/bpf')
-rw-r--r--tools/perf/examples/bpf/5sec.c49
-rw-r--r--tools/perf/examples/bpf/augmented_raw_syscalls.c131
-rw-r--r--tools/perf/examples/bpf/augmented_syscalls.c173
-rw-r--r--tools/perf/examples/bpf/empty.c3
-rw-r--r--tools/perf/examples/bpf/etcsnoop.c80
-rw-r--r--tools/perf/examples/bpf/hello.c9
-rw-r--r--tools/perf/examples/bpf/sys_enter_openat.c33
7 files changed, 478 insertions, 0 deletions
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 000000000000..b9c203219691
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ Description:
+
+ . Disable strace like syscall tracing (--no-syscalls), or try tracing
+ just some (-e *sleep).
+
+ . Attach a filter function to a kernel function, returning when it should
+ be considered, i.e. appear on the output.
+
+ . Run it system wide, so that any sleep of >= 5 seconds and < than 6
+ seconds gets caught.
+
+ . Ask for callgraphs using DWARF info, so that userspace can be unwound
+
+ . While this is running, run something like "sleep 5s".
+
+ . If we decide to add tv_nsec as well, then it becomes:
+
+ int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
+
+ I.e. add where it comes from (rqtp->tv_nsec) and where it will be
+ accessible in the function body (nsec)
+
+ # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
+ 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
+ hrtimer_nanosleep ([kernel.kallsyms])
+ __x64_sys_nanosleep ([kernel.kallsyms])
+ do_syscall_64 ([kernel.kallsyms])
+ entry_SYSCALL_64 ([kernel.kallsyms])
+ __GI___nanosleep (/usr/lib64/libc-2.26.so)
+ rpl_nanosleep (/usr/bin/sleep)
+ xnanosleep (/usr/bin/sleep)
+ main (/usr/bin/sleep)
+ __libc_start_main (/usr/lib64/libc-2.26.so)
+ _start (/usr/bin/sleep)
+ ^C#
+
+ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
+*/
+
+#include <bpf.h>
+
+int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+{
+ return sec == 5;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
new file mode 100644
index 000000000000..90a19336310b
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the raw_syscalls tracepoints with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * This exactly matches what is marshalled into the raw_syscall:sys_enter
+ * payload expected by the 'perf trace' beautifiers.
+ *
+ * For now it just uses the existing tracepoint augmentation code in 'perf
+ * trace', in the next csets we'll hook up these with the sys_enter/sys_exit
+ * code that will combine entry/exit in a strace like way.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+/* bpf-output associated map */
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = __NR_CPUS__,
+};
+
+struct syscall_enter_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ unsigned long args[6];
+};
+
+struct syscall_exit_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long ret;
+};
+
+struct augmented_filename {
+ unsigned int size;
+ int reserved;
+ char value[256];
+};
+
+#define SYS_OPEN 2
+#define SYS_OPENAT 257
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+ struct {
+ struct syscall_enter_args args;
+ struct augmented_filename filename;
+ } augmented_args;
+ unsigned int len = sizeof(augmented_args);
+ const void *filename_arg = NULL;
+
+ probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+ /*
+ * Yonghong and Edward Cree sayz:
+ *
+ * https://www.spinics.net/lists/netdev/msg531645.html
+ *
+ * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1
+ * >> 10: (bf) r1 = r6
+ * >> 11: (07) r1 += 16
+ * >> 12: (05) goto pc+2
+ * >> 15: (79) r3 = *(u64 *)(r1 +0)
+ * >> dereference of modified ctx ptr R1 off=16 disallowed
+ * > Aha, we at least got a different error message this time.
+ * > And indeed llvm has done that optimisation, rather than the more obvious
+ * > 11: r3 = *(u64 *)(r1 +16)
+ * > because it wants to have lots of reads share a single insn. You may be able
+ * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone
+ * > with llvm knowledge can figure out how to stop it (ideally, llvm would know
+ * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯
+ *
+ * The optimization mostly likes below:
+ *
+ * br1:
+ * ...
+ * r1 += 16
+ * goto merge
+ * br2:
+ * ...
+ * r1 += 20
+ * goto merge
+ * merge:
+ * *(u64 *)(r1 + 0)
+ *
+ * The compiler tries to merge common loads. There is no easy way to
+ * stop this compiler optimization without turning off a lot of other
+ * optimizations. The easiest way is to add barriers:
+ *
+ * __asm__ __volatile__("": : :"memory")
+ *
+ * after the ctx memory access to prevent their down stream merging.
+ */
+ switch (augmented_args.args.syscall_nr) {
+ case SYS_OPEN: filename_arg = (const void *)args->args[0];
+ __asm__ __volatile__("": : :"memory");
+ break;
+ case SYS_OPENAT: filename_arg = (const void *)args->args[1];
+ break;
+ }
+
+ if (filename_arg != NULL) {
+ augmented_args.filename.reserved = 0;
+ augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
+ sizeof(augmented_args.filename.value),
+ filename_arg);
+ if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
+ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
+ len &= sizeof(augmented_args.filename.value) - 1;
+ }
+ } else {
+ len = sizeof(augmented_args.args);
+ }
+
+ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+ return 0;
+}
+
+SEC("raw_syscalls:sys_exit")
+int sys_exit(struct syscall_exit_args *args)
+{
+ return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
new file mode 100644
index 000000000000..2ae44813ef2d
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment syscalls with the contents of the pointer arguments.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them, that will
+ * check if perf_sample->raw_data is more than what is expected for each
+ * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
+ * contents of pointer arguments.
+ */
+
+#include <stdio.h>
+#include <linux/socket.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = __NR_CPUS__,
+};
+
+struct syscall_exit_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long ret;
+};
+
+struct augmented_filename {
+ unsigned int size;
+ int reserved;
+ char value[256];
+};
+
+#define augmented_filename_syscall(syscall) \
+struct augmented_enter_##syscall##_args { \
+ struct syscall_enter_##syscall##_args args; \
+ struct augmented_filename filename; \
+}; \
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
+{ \
+ struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
+ unsigned int len = sizeof(augmented_args); \
+ probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
+ augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
+ sizeof(augmented_args.filename.value), \
+ args->filename_ptr); \
+ if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \
+ len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \
+ len &= sizeof(augmented_args.filename.value) - 1; \
+ } \
+ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
+ &augmented_args, len); \
+ return 0; \
+} \
+int syscall_exit(syscall)(struct syscall_exit_args *args) \
+{ \
+ return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
+}
+
+struct syscall_enter_openat_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long dfd;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+augmented_filename_syscall(openat);
+
+struct syscall_enter_open_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+augmented_filename_syscall(open);
+
+struct syscall_enter_inotify_add_watch_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long fd;
+ char *filename_ptr;
+ long mask;
+};
+
+augmented_filename_syscall(inotify_add_watch);
+
+struct statbuf;
+
+struct syscall_enter_newstat_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ char *filename_ptr;
+ struct stat *statbuf;
+};
+
+augmented_filename_syscall(newstat);
+
+#ifndef _K_SS_MAXSIZE
+#define _K_SS_MAXSIZE 128
+#endif
+
+#define augmented_sockaddr_syscall(syscall) \
+struct augmented_enter_##syscall##_args { \
+ struct syscall_enter_##syscall##_args args; \
+ struct sockaddr_storage addr; \
+}; \
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
+{ \
+ struct augmented_enter_##syscall##_args augmented_args; \
+ unsigned long addrlen = sizeof(augmented_args.addr); \
+ probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
+/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \
+/* if (addrlen > augmented_args.args.addrlen) */ \
+/* addrlen = augmented_args.args.addrlen; */ \
+/* */ \
+ probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \
+ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
+ &augmented_args, \
+ sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \
+ return 0; \
+} \
+int syscall_exit(syscall)(struct syscall_exit_args *args) \
+{ \
+ return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \
+}
+
+struct sockaddr;
+
+struct syscall_enter_bind_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long fd;
+ struct sockaddr *addr_ptr;
+ unsigned long addrlen;
+};
+
+augmented_sockaddr_syscall(bind);
+
+struct syscall_enter_connect_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long fd;
+ struct sockaddr *addr_ptr;
+ unsigned long addrlen;
+};
+
+augmented_sockaddr_syscall(connect);
+
+struct syscall_enter_sendto_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long fd;
+ void *buff;
+ long len;
+ unsigned long flags;
+ struct sockaddr *addr_ptr;
+ long addr_len;
+};
+
+augmented_sockaddr_syscall(sendto);
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 000000000000..3776d26db9e7
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
+#include <bpf.h>
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
new file mode 100644
index 000000000000..b59e8812ee8c
--- /dev/null
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the filename syscalls with the contents of the filename pointer argument
+ * filtering only those that do not start with /etc/.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
+ * which will be done as that feature is implemented in the next csets, for now
+ * it will appear in a dump done by the default tracepoint handler in 'perf trace',
+ * that uses bpf_output__fprintf() to just dump those contents, as done with
+ * the bpf-output event associated with the __bpf_output__ map declared in
+ * tools/perf/include/bpf/stdio.h.
+ */
+
+#include <stdio.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = __NR_CPUS__,
+};
+
+struct augmented_filename {
+ int size;
+ int reserved;
+ char value[64];
+};
+
+#define augmented_filename_syscall_enter(syscall) \
+struct augmented_enter_##syscall##_args { \
+ struct syscall_enter_##syscall##_args args; \
+ struct augmented_filename filename; \
+}; \
+int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
+{ \
+ char etc[6] = "/etc/"; \
+ struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \
+ probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \
+ augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \
+ sizeof(augmented_args.filename.value), \
+ args->filename_ptr); \
+ if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \
+ return 0; \
+ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
+ &augmented_args, \
+ (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \
+ augmented_args.filename.size)); \
+ return 0; \
+}
+
+struct syscall_enter_openat_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long dfd;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+augmented_filename_syscall_enter(openat);
+
+struct syscall_enter_open_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+augmented_filename_syscall_enter(open);
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
new file mode 100644
index 000000000000..cf3c2fdc7f79
--- /dev/null
+++ b/tools/perf/examples/bpf/hello.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int syscall_enter(openat)(void *args)
+{
+ puts("Hello, world\n");
+ return 0;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c
new file mode 100644
index 000000000000..9cd124b09392
--- /dev/null
+++ b/tools/perf/examples/bpf/sys_enter_openat.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hook into 'openat' syscall entry tracepoint
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * The syscall_enter_openat_args can be used to get the syscall fields
+ * and use them for filtering calls, i.e. use in expressions for
+ * the return value.
+ */
+
+#include <bpf.h>
+
+struct syscall_enter_openat_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long dfd;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+int syscall_enter(openat)(struct syscall_enter_openat_args *args)
+{
+ return 1;
+}
+
+license(GPL);
OpenPOWER on IntegriCloud