diff options
Diffstat (limited to 'tools/perf/examples')
-rw-r--r-- | tools/perf/examples/bpf/augmented_raw_syscalls.c | 131 | ||||
-rw-r--r-- | tools/perf/examples/bpf/augmented_syscalls.c | 154 | ||||
-rw-r--r-- | tools/perf/examples/bpf/etcsnoop.c | 80 |
3 files changed, 347 insertions, 18 deletions
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c new file mode 100644 index 000000000000..90a19336310b --- /dev/null +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the raw_syscalls tracepoints with the contents of the pointer arguments. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c cat /etc/passwd > /dev/null + * + * This exactly matches what is marshalled into the raw_syscall:sys_enter + * payload expected by the 'perf trace' beautifiers. + * + * For now it just uses the existing tracepoint augmentation code in 'perf + * trace', in the next csets we'll hook up these with the sys_enter/sys_exit + * code that will combine entry/exit in a strace like way. + */ + +#include <stdio.h> +#include <linux/socket.h> + +/* bpf-output associated map */ +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct syscall_enter_args { + unsigned long long common_tp_fields; + long syscall_nr; + unsigned long args[6]; +}; + +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define SYS_OPEN 2 +#define SYS_OPENAT 257 + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + struct { + struct syscall_enter_args args; + struct augmented_filename filename; + } augmented_args; + unsigned int len = sizeof(augmented_args); + const void *filename_arg = NULL; + + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + /* + * Yonghong and Edward Cree sayz: + * + * https://www.spinics.net/lists/netdev/msg531645.html + * + * >> R0=inv(id=0) R1=inv2 R6=ctx(id=0,off=0,imm=0) R7=inv64 R10=fp0,call_-1 + * >> 10: (bf) r1 = r6 + * >> 11: (07) r1 += 16 + * >> 12: (05) goto pc+2 + * >> 15: (79) r3 = *(u64 *)(r1 +0) + * >> dereference of modified ctx ptr R1 off=16 disallowed + * > Aha, we at least got a different error message this time. + * > And indeed llvm has done that optimisation, rather than the more obvious + * > 11: r3 = *(u64 *)(r1 +16) + * > because it wants to have lots of reads share a single insn. You may be able + * > to defeat that optimisation by adding compiler barriers, idk. Maybe someone + * > with llvm knowledge can figure out how to stop it (ideally, llvm would know + * > when it's generating for bpf backend and not do that). -O0? ¯\_(ツ)_/¯ + * + * The optimization mostly likes below: + * + * br1: + * ... + * r1 += 16 + * goto merge + * br2: + * ... + * r1 += 20 + * goto merge + * merge: + * *(u64 *)(r1 + 0) + * + * The compiler tries to merge common loads. There is no easy way to + * stop this compiler optimization without turning off a lot of other + * optimizations. The easiest way is to add barriers: + * + * __asm__ __volatile__("": : :"memory") + * + * after the ctx memory access to prevent their down stream merging. + */ + switch (augmented_args.args.syscall_nr) { + case SYS_OPEN: filename_arg = (const void *)args->args[0]; + __asm__ __volatile__("": : :"memory"); + break; + case SYS_OPENAT: filename_arg = (const void *)args->args[1]; + break; + } + + if (filename_arg != NULL) { + augmented_args.filename.reserved = 0; + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, + sizeof(augmented_args.filename.value), + filename_arg); + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; + len &= sizeof(augmented_args.filename.value) - 1; + } + } else { + len = sizeof(augmented_args.args); + } + + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return 0; +} + +SEC("raw_syscalls:sys_exit") +int sys_exit(struct syscall_exit_args *args) +{ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ +} + +license(GPL); diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 69a31386d8cd..2ae44813ef2d 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Augment the openat syscall with the contents of the filename pointer argument. + * Augment syscalls with the contents of the pointer arguments. * * Test it with: * @@ -10,15 +10,14 @@ * the last one should be the one for '/etc/passwd'. * * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. + * expected by the 'perf trace' beautifiers, and can be used by them, that will + * check if perf_sample->raw_data is more than what is expected for each + * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the + * contents of pointer arguments. */ #include <stdio.h> +#include <linux/socket.h> struct bpf_map SEC("maps") __augmented_syscalls__ = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, @@ -27,6 +26,44 @@ struct bpf_map SEC("maps") __augmented_syscalls__ = { .max_entries = __NR_CPUS__, }; +struct syscall_exit_args { + unsigned long long common_tp_fields; + long syscall_nr; + long ret; +}; + +struct augmented_filename { + unsigned int size; + int reserved; + char value[256]; +}; + +#define augmented_filename_syscall(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + unsigned int len = sizeof(augmented_args); \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ + len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ + len &= sizeof(augmented_args.filename.value) - 1; \ + } \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, len); \ + return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ +} + struct syscall_enter_openat_args { unsigned long long common_tp_fields; long syscall_nr; @@ -36,20 +73,101 @@ struct syscall_enter_openat_args { long mode; }; -struct augmented_enter_openat_args { - struct syscall_enter_openat_args args; - char filename[64]; +augmented_filename_syscall(openat); + +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall(open); + +struct syscall_enter_inotify_add_watch_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + char *filename_ptr; + long mask; +}; + +augmented_filename_syscall(inotify_add_watch); + +struct statbuf; + +struct syscall_enter_newstat_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + struct stat *statbuf; }; -int syscall_enter(openat)(struct syscall_enter_openat_args *args) -{ - struct augmented_enter_openat_args augmented_args; +augmented_filename_syscall(newstat); + +#ifndef _K_SS_MAXSIZE +#define _K_SS_MAXSIZE 128 +#endif - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); - probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr); - perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, - &augmented_args, sizeof(augmented_args)); - return 1; +#define augmented_sockaddr_syscall(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct sockaddr_storage addr; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + struct augmented_enter_##syscall##_args augmented_args; \ + unsigned long addrlen = sizeof(augmented_args.addr); \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ +/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ +/* if (addrlen > augmented_args.args.addrlen) */ \ +/* addrlen = augmented_args.args.addrlen; */ \ +/* */ \ + probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ + return 0; \ +} \ +int syscall_exit(syscall)(struct syscall_exit_args *args) \ +{ \ + return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ } +struct sockaddr; + +struct syscall_enter_bind_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +augmented_sockaddr_syscall(bind); + +struct syscall_enter_connect_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + struct sockaddr *addr_ptr; + unsigned long addrlen; +}; + +augmented_sockaddr_syscall(connect); + +struct syscall_enter_sendto_args { + unsigned long long common_tp_fields; + long syscall_nr; + long fd; + void *buff; + long len; + unsigned long flags; + struct sockaddr *addr_ptr; + long addr_len; +}; + +augmented_sockaddr_syscall(sendto); + license(GPL); diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c new file mode 100644 index 000000000000..b59e8812ee8c --- /dev/null +++ b/tools/perf/examples/bpf/etcsnoop.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Augment the filename syscalls with the contents of the filename pointer argument + * filtering only those that do not start with /etc/. + * + * Test it with: + * + * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null + * + * It'll catch some openat syscalls related to the dynamic linked and + * the last one should be the one for '/etc/passwd'. + * + * This matches what is marshalled into the raw_syscall:sys_enter payload + * expected by the 'perf trace' beautifiers, and can be used by them unmodified, + * which will be done as that feature is implemented in the next csets, for now + * it will appear in a dump done by the default tracepoint handler in 'perf trace', + * that uses bpf_output__fprintf() to just dump those contents, as done with + * the bpf-output event associated with the __bpf_output__ map declared in + * tools/perf/include/bpf/stdio.h. + */ + +#include <stdio.h> + +struct bpf_map SEC("maps") __augmented_syscalls__ = { + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(u32), + .max_entries = __NR_CPUS__, +}; + +struct augmented_filename { + int size; + int reserved; + char value[64]; +}; + +#define augmented_filename_syscall_enter(syscall) \ +struct augmented_enter_##syscall##_args { \ + struct syscall_enter_##syscall##_args args; \ + struct augmented_filename filename; \ +}; \ +int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ +{ \ + char etc[6] = "/etc/"; \ + struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ + probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ + augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ + sizeof(augmented_args.filename.value), \ + args->filename_ptr); \ + if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ + return 0; \ + perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ + &augmented_args, \ + (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ + augmented_args.filename.size)); \ + return 0; \ +} + +struct syscall_enter_openat_args { + unsigned long long common_tp_fields; + long syscall_nr; + long dfd; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(openat); + +struct syscall_enter_open_args { + unsigned long long common_tp_fields; + long syscall_nr; + char *filename_ptr; + long flags; + long mode; +}; + +augmented_filename_syscall_enter(open); + +license(GPL); |