diff options
Diffstat (limited to 'tools/perf/examples/bpf/augmented_raw_syscalls.c')
-rw-r--r-- | tools/perf/examples/bpf/augmented_raw_syscalls.c | 154 |
1 files changed, 121 insertions, 33 deletions
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index f9b2161e1ca4..2f822bb51717 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -15,13 +15,20 @@ */ #include <unistd.h> +#include <linux/limits.h> #include <pid_filter.h> /* bpf-output associated map */ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +/* + * string_args_len: one per syscall arg, 0 means not a string or don't copy it, + * PATH_MAX for copying everything, any other value to limit + * it a la 'strace -s strsize'. + */ struct syscall { bool enabled; + u16 string_args_len[6]; }; bpf_map(syscalls, ARRAY, int, struct syscall, 512); @@ -40,33 +47,74 @@ struct syscall_exit_args { struct augmented_filename { unsigned int size; - int reserved; - char value[256]; + int err; + char value[PATH_MAX]; }; -#define SYS_OPEN 2 -#define SYS_ACCESS 21 -#define SYS_OPENAT 257 - pid_filter(pids_filtered); +struct augmented_args_filename { + struct syscall_enter_args args; + struct augmented_filename filename; +}; + +bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); + +static inline +unsigned int augmented_filename__read(struct augmented_filename *augmented_filename, + const void *filename_arg, unsigned int filename_len) +{ + unsigned int len = sizeof(*augmented_filename); + int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg); + + augmented_filename->size = augmented_filename->err = 0; + /* + * probe_read_str may return < 0, e.g. -EFAULT + * So we leave that in the augmented_filename->size that userspace will + */ + if (size > 0) { + len -= sizeof(augmented_filename->value) - size; + len &= sizeof(augmented_filename->value) - 1; + augmented_filename->size = size; + } else { + /* + * So that username notice the error while still being able + * to skip this augmented arg record + */ + augmented_filename->err = size; + len = offsetof(struct augmented_filename, value); + } + + return len; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { - struct { - struct syscall_enter_args args; - struct augmented_filename filename; - } augmented_args; + struct augmented_args_filename *augmented_args; + /* + * We start len, the amount of data that will be in the perf ring + * buffer, if this is not filtered out by one of pid_filter__has(), + * syscall->enabled, etc, with the non-augmented raw syscall payload, + * i.e. sizeof(augmented_args->args). + * + * We'll add to this as we add augmented syscalls right after that + * initial, non-augmented raw_syscalls:sys_enter payload. + */ + unsigned int len = sizeof(augmented_args->args); struct syscall *syscall; - unsigned int len = sizeof(augmented_args); - const void *filename_arg = NULL; + int key = 0; + + augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key); + if (augmented_args == NULL) + return 1; if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); + probe_read(&augmented_args->args, sizeof(augmented_args->args), args); - syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr); + syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr); if (syscall == NULL || !syscall->enabled) return 0; /* @@ -109,30 +157,70 @@ int sys_enter(struct syscall_enter_args *args) * * after the ctx memory access to prevent their down stream merging. */ - switch (augmented_args.args.syscall_nr) { - case SYS_ACCESS: - case SYS_OPEN: filename_arg = (const void *)args->args[0]; + /* + * For now copy just the first string arg, we need to improve the protocol + * and have more than one. + * + * Using the unrolled loop is not working, only when we do it manually, + * check this out later... + + u8 arg; +#pragma clang loop unroll(full) + for (arg = 0; arg < 6; ++arg) { + if (syscall->string_args_len[arg] != 0) { + filename_len = syscall->string_args_len[arg]; + filename_arg = (const void *)args->args[arg]; __asm__ __volatile__("": : :"memory"); - break; - case SYS_OPENAT: filename_arg = (const void *)args->args[1]; - break; - } - - if (filename_arg != NULL) { - augmented_args.filename.reserved = 0; - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, - sizeof(augmented_args.filename.value), - filename_arg); - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; - len &= sizeof(augmented_args.filename.value) - 1; + break; } - } else { - len = sizeof(augmented_args.args); } + verifier log: + +; if (syscall->string_args_len[arg] != 0) { +37: (69) r3 = *(u16 *)(r0 +2) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +38: (55) if r3 != 0x0 goto pc+5 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +39: (b7) r1 = 1 +; if (syscall->string_args_len[arg] != 0) { +40: (bf) r2 = r0 +41: (07) r2 += 4 +42: (69) r3 = *(u16 *)(r0 +4) + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; if (syscall->string_args_len[arg] != 0) { +43: (15) if r3 == 0x0 goto pc+32 + R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm +; filename_arg = (const void *)args->args[arg]; +44: (67) r1 <<= 3 +45: (bf) r3 = r6 +46: (0f) r3 += r1 +47: (b7) r5 = 64 +48: (79) r3 = *(u64 *)(r3 +16) +dereference of modified ctx ptr R3 off=8 disallowed +processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7 + */ + +#define __loop_iter(arg) \ + if (syscall->string_args_len[arg] != 0) { \ + unsigned int filename_len = syscall->string_args_len[arg]; \ + const void *filename_arg = (const void *)args->args[arg]; \ + if (filename_len <= sizeof(augmented_args->filename.value)) \ + len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len); +#define loop_iter_first() __loop_iter(0); } +#define loop_iter(arg) else __loop_iter(arg); } +#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); } + + loop_iter_first() + loop_iter(1) + loop_iter(2) + loop_iter(3) + loop_iter(4) + loop_iter_last(5) + /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); + return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len); } SEC("raw_syscalls:sys_exit") |