diff options
Diffstat (limited to 'tools/lib')
60 files changed, 5926 insertions, 656 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index f81e549ddfdb..4db74758c674 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,2 +1,3 @@ libbpf_version.h FEATURE-DUMP.libbpf +test_libbpf diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 7bc31c905018..ee9d5362f35b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 425b480bda75..a05c43468bd0 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -14,21 +14,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) - INSTALL = install # Use DESTDIR for installing into a different root directory. @@ -54,7 +39,7 @@ man_dir_SQ = '$(subst ','\'',$(man_dir))' export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ -include ../../scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild @@ -66,7 +51,7 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx FEATURE_DISPLAY = libelf bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -145,14 +130,26 @@ include $(srctree)/tools/build/Makefile.include BPF_IN := $(OUTPUT)libbpf-in.o LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +VERSION_SCRIPT := libbpf.map + +GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ + awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') +VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ + grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) CMD_TARGETS = $(LIB_FILE) +CXX_TEST_TARGET = $(OUTPUT)test_libbpf + +ifeq ($(feature-cxx), 1) + CMD_TARGETS += $(CXX_TEST_TARGET) +endif + TARGETS = $(CMD_TARGETS) all: fixdep all_cmd -all_cmd: $(CMD_TARGETS) +all_cmd: $(CMD_TARGETS) check $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ @@ -167,14 +164,33 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ + (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf $(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared $^ -o $@ + $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ + $^ -o $@ $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a + $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ + +check: check_abi + +check_abi: $(OUTPUT)libbpf.so + @if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \ + echo "Warning: Num of global symbols in $(BPF_IN)" \ + "($(GLOBAL_SYM_COUNT)) does NOT match with num of" \ + "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \ + "Please make sure all LIBBPF_API symbols are" \ + "versioned in $(VERSION_SCRIPT)." >&2; \ + exit 1; \ + fi + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -201,8 +217,8 @@ config-clean: $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null clean: - $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \ - $(RM) LIBBPF-CFLAGS + $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ + *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst new file mode 100644 index 000000000000..5788479384ca --- /dev/null +++ b/tools/lib/bpf/README.rst @@ -0,0 +1,166 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +libbpf API naming convention +============================ + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one-on-one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +Another example: ``bpf_program__load`` is named for corresponding +object, ``bpf_program``, that is separated from other part of the name +by double underscore. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +AF_XDP functions +------------------- + +AF_XDP functions should have an ``xsk_`` prefix, e.g. +``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists +of both low-level ring access functions and high-level configuration +functions. These can be mixed and matched. Note that these functions +are not reentrant for performance reasons. + +Please take a look at Documentation/networking/af_xdp.rst in the Linux +kernel source tree on how to use XDP sockets and for some common +mistakes in case you do not get any traffic up to user space. + +libbpf ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Stand-alone build +================= + +Under https://github.com/libbpf/libbpf there is a (semi-)automated +mirror of the mainline's version of libbpf for a stand-alone build. + +However, all changes to libbpf's code base must be upstreamed through +the mainline kernel tree. + +License +======= + +libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. + +Links +===== + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 03f9bcc4ef50..9cd015574e83 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -22,6 +22,7 @@ */ #include <stdlib.h> +#include <string.h> #include <memory.h> #include <unistd.h> #include <asm/unistd.h> @@ -65,6 +66,17 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; @@ -173,42 +185,133 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name, -1); } +static void * +alloc_zero_tailing_info(const void *orecord, __u32 cnt, + __u32 actual_rec_size, __u32 expected_rec_size) +{ + __u64 info_len = actual_rec_size * cnt; + void *info, *nrecord; + int i; + + info = malloc(info_len); + if (!info) + return NULL; + + /* zero out bytes kernel does not understand */ + nrecord = info; + for (i = 0; i < cnt; i++) { + memcpy(nrecord, orecord, expected_rec_size); + memset(nrecord + expected_rec_size, 0, + actual_rec_size - expected_rec_size); + orecord += actual_rec_size; + nrecord += actual_rec_size; + } + + return info; +} + int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, char *log_buf, size_t log_buf_sz) { + void *finfo = NULL, *linfo = NULL; union bpf_attr attr; + __u32 log_level; __u32 name_len; int fd; - if (!load_attr) + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + log_level = load_attr->log_level; + if (log_level > 2 || (log_level && !log_buf)) return -EINVAL; name_len = load_attr->name ? strlen(load_attr->name) : 0; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; - attr.log_level = 0; + + attr.log_level = log_level; + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + } else { + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + } + attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; + attr.prog_btf_fd = load_attr->prog_btf_fd; + attr.func_info_rec_size = load_attr->func_info_rec_size; + attr.func_info_cnt = load_attr->func_info_cnt; + attr.func_info = ptr_to_u64(load_attr->func_info); + attr.line_info_rec_size = load_attr->line_info_rec_size; + attr.line_info_cnt = load_attr->line_info_cnt; + attr.line_info = ptr_to_u64(load_attr->line_info); memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); - if (fd >= 0 || !log_buf || !log_buf_sz) + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + if (fd >= 0) return fd; + /* After bpf_prog_load, the kernel may modify certain attributes + * to give user space a hint how to deal with loading failure. + * Check to see whether we can make some changes and load again. + */ + while (errno == E2BIG && (!finfo || !linfo)) { + if (!finfo && attr.func_info_cnt && + attr.func_info_rec_size < load_attr->func_info_rec_size) { + /* try with corrected func info records */ + finfo = alloc_zero_tailing_info(load_attr->func_info, + load_attr->func_info_cnt, + load_attr->func_info_rec_size, + attr.func_info_rec_size); + if (!finfo) + goto done; + + attr.func_info = ptr_to_u64(finfo); + attr.func_info_rec_size = load_attr->func_info_rec_size; + } else if (!linfo && attr.line_info_cnt && + attr.line_info_rec_size < + load_attr->line_info_rec_size) { + linfo = alloc_zero_tailing_info(load_attr->line_info, + load_attr->line_info_cnt, + load_attr->line_info_rec_size, + attr.line_info_rec_size); + if (!linfo) + goto done; + + attr.line_info = ptr_to_u64(linfo); + attr.line_info_rec_size = load_attr->line_info_rec_size; + } else { + break; + } + + fd = sys_bpf_prog_load(&attr, sizeof(attr)); + + if (fd >= 0) + goto done; + } + + if (log_level || !log_buf) + goto done; + /* Try again with log */ attr.log_buf = ptr_to_u64(log_buf); attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); +done: + free(finfo); + free(linfo); + return fd; } int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -231,13 +334,13 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, } int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, - const char *license, __u32 kern_version, - char *log_buf, size_t log_buf_sz, int log_level) + size_t insns_cnt, __u32 prog_flags, const char *license, + __u32 kern_version, char *log_buf, size_t log_buf_sz, + int log_level) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; attr.insns = ptr_to_u64(insns); @@ -247,9 +350,9 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.log_level = log_level; log_buf[0] = 0; attr.kern_version = kern_version; - attr.prog_flags = strict_alignment ? BPF_F_STRICT_ALIGNMENT : 0; + attr.prog_flags = prog_flags; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + return sys_bpf_prog_load(&attr, sizeof(attr)); } int bpf_map_update_elem(int fd, const void *key, const void *value, @@ -257,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -270,10 +373,23 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); + attr.flags = flags; return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } @@ -282,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -294,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); @@ -305,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); @@ -317,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; @@ -328,7 +444,7 @@ int bpf_obj_get(const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); @@ -339,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -352,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_type = type; @@ -363,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -377,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.query.target_fd = target_fd; attr.query.attach_type = type; attr.query.query_flags = query_flags; @@ -398,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; attr.test.data_in = ptr_to_u64(data); attr.test.data_out = ptr_to_u64(data_out); @@ -415,12 +531,35 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, return ret; } +int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) +{ + union bpf_attr attr; + int ret; + + if (!test_attr->data_out && test_attr->data_size_out > 0) + return -EINVAL; + + memset(&attr, 0, sizeof(attr)); + attr.test.prog_fd = test_attr->prog_fd; + attr.test.data_in = ptr_to_u64(test_attr->data_in); + attr.test.data_out = ptr_to_u64(test_attr->data_out); + attr.test.data_size_in = test_attr->data_size_in; + attr.test.data_size_out = test_attr->data_size_out; + attr.test.repeat = test_attr->repeat; + + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + test_attr->data_size_out = attr.test.data_size_out; + test_attr->retval = attr.test.retval; + test_attr->duration = attr.test.duration; + return ret; +} + int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) { union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); @@ -435,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); @@ -449,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_id = id; return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -459,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_id = id; return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -469,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.btf_id = id; return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -480,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.info.bpf_fd = prog_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); @@ -496,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 26a51538213c..6ffdd79bea89 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -27,6 +27,10 @@ #include <stdbool.h> #include <stddef.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -74,6 +78,14 @@ struct bpf_load_program_attr { const char *license; __u32 kern_version; __u32 prog_ifindex; + __u32 prog_btf_fd; + __u32 func_info_rec_size; + const void *func_info; + __u32 func_info_cnt; + __u32 line_info_rec_size; + const void *line_info; + __u32 line_info_cnt; + __u32 log_level; }; /* Flags to direct loading requirements */ @@ -90,7 +102,7 @@ LIBBPF_API int bpf_load_program(enum bpf_prog_type type, char *log_buf, size_t log_buf_sz); LIBBPF_API int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, - size_t insns_cnt, int strict_alignment, + size_t insns_cnt, __u32 prog_flags, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz, int log_level); @@ -99,6 +111,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, + __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); @@ -110,6 +124,25 @@ LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); + +struct bpf_prog_test_run_attr { + int prog_fd; + int repeat; + const void *data_in; + __u32 data_size_in; + void *data_out; /* optional */ + __u32 data_size_out; /* in: max length of data_out + * out: length of data_out */ + __u32 retval; /* out: return code of the BPF program */ + __u32 duration; /* out: average per repetition in ns */ +}; + +LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr); + +/* + * bpf_prog_test_run does not check that data_out is large enough. Consider + * using bpf_prog_test_run_xattr instead. + */ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, void *data_out, __u32 *size_out, __u32 *retval, __u32 *duration); @@ -128,4 +161,9 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_BPF_H */ diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c new file mode 100644 index 000000000000..6978314ea7f6 --- /dev/null +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2018 Facebook */ + +#include <string.h> +#include <stdlib.h> +#include <linux/err.h> +#include <linux/bpf.h> +#include "libbpf.h" + +#ifndef min +#define min(x, y) ((x) < (y) ? (x) : (y)) +#endif + +struct bpf_prog_linfo { + void *raw_linfo; + void *raw_jited_linfo; + __u32 *nr_jited_linfo_per_func; + __u32 *jited_linfo_func_idx; + __u32 nr_linfo; + __u32 nr_jited_func; + __u32 rec_size; + __u32 jited_rec_size; +}; + +static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo, + const __u64 *ksym_func, const __u32 *ksym_len) +{ + __u32 nr_jited_func, nr_linfo; + const void *raw_jited_linfo; + const __u64 *jited_linfo; + __u64 last_jited_linfo; + /* + * Index to raw_jited_linfo: + * i: Index for searching the next ksym_func + * prev_i: Index to the last found ksym_func + */ + __u32 i, prev_i; + __u32 f; /* Index to ksym_func */ + + raw_jited_linfo = prog_linfo->raw_jited_linfo; + jited_linfo = raw_jited_linfo; + if (ksym_func[0] != *jited_linfo) + goto errout; + + prog_linfo->jited_linfo_func_idx[0] = 0; + nr_jited_func = prog_linfo->nr_jited_func; + nr_linfo = prog_linfo->nr_linfo; + + for (prev_i = 0, i = 1, f = 1; + i < nr_linfo && f < nr_jited_func; + i++) { + raw_jited_linfo += prog_linfo->jited_rec_size; + last_jited_linfo = *jited_linfo; + jited_linfo = raw_jited_linfo; + + if (ksym_func[f] == *jited_linfo) { + prog_linfo->jited_linfo_func_idx[f] = i; + + /* Sanity check */ + if (last_jited_linfo - ksym_func[f - 1] + 1 > + ksym_len[f - 1]) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[f - 1] = + i - prev_i; + prev_i = i; + + /* + * The ksym_func[f] is found in jited_linfo. + * Look for the next one. + */ + f++; + } else if (*jited_linfo <= last_jited_linfo) { + /* Ensure the addr is increasing _within_ a func */ + goto errout; + } + } + + if (f != nr_jited_func) + goto errout; + + prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] = + nr_linfo - prev_i; + + return 0; + +errout: + return -EINVAL; +} + +void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo) +{ + if (!prog_linfo) + return; + + free(prog_linfo->raw_linfo); + free(prog_linfo->raw_jited_linfo); + free(prog_linfo->nr_jited_linfo_per_func); + free(prog_linfo->jited_linfo_func_idx); + free(prog_linfo); +} + +struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info) +{ + struct bpf_prog_linfo *prog_linfo; + __u32 nr_linfo, nr_jited_func; + + nr_linfo = info->nr_line_info; + + if (!nr_linfo) + return NULL; + + /* + * The min size that bpf_prog_linfo has to access for + * searching purpose. + */ + if (info->line_info_rec_size < + offsetof(struct bpf_line_info, file_name_off)) + return NULL; + + prog_linfo = calloc(1, sizeof(*prog_linfo)); + if (!prog_linfo) + return NULL; + + /* Copy xlated line_info */ + prog_linfo->nr_linfo = nr_linfo; + prog_linfo->rec_size = info->line_info_rec_size; + prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); + if (!prog_linfo->raw_linfo) + goto err_free; + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, + nr_linfo * prog_linfo->rec_size); + + nr_jited_func = info->nr_jited_ksyms; + if (!nr_jited_func || + !info->jited_line_info || + info->nr_jited_line_info != nr_linfo || + info->jited_line_info_rec_size < sizeof(__u64) || + info->nr_jited_func_lens != nr_jited_func || + !info->jited_ksyms || + !info->jited_func_lens) + /* Not enough info to provide jited_line_info */ + return prog_linfo; + + /* Copy jited_line_info */ + prog_linfo->nr_jited_func = nr_jited_func; + prog_linfo->jited_rec_size = info->jited_line_info_rec_size; + prog_linfo->raw_jited_linfo = malloc(nr_linfo * + prog_linfo->jited_rec_size); + if (!prog_linfo->raw_jited_linfo) + goto err_free; + memcpy(prog_linfo->raw_jited_linfo, + (void *)(long)info->jited_line_info, + nr_linfo * prog_linfo->jited_rec_size); + + /* Number of jited_line_info per jited func */ + prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->nr_jited_linfo_per_func) + goto err_free; + + /* + * For each jited func, + * the start idx to the "linfo" and "jited_linfo" array, + */ + prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func * + sizeof(__u32)); + if (!prog_linfo->jited_linfo_func_idx) + goto err_free; + + if (dissect_jited_func(prog_linfo, + (__u64 *)(long)info->jited_ksyms, + (__u32 *)(long)info->jited_func_lens)) + goto err_free; + + return prog_linfo; + +err_free: + bpf_prog_linfo__free(prog_linfo); + return NULL; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip) +{ + __u32 jited_rec_size, rec_size, nr_linfo, start, i; + const void *raw_jited_linfo, *raw_linfo; + const __u64 *jited_linfo; + + if (func_idx >= prog_linfo->nr_jited_func) + return NULL; + + nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx]; + if (nr_skip >= nr_linfo) + return NULL; + + start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip; + jited_rec_size = prog_linfo->jited_rec_size; + raw_jited_linfo = prog_linfo->raw_jited_linfo + + (start * jited_rec_size); + jited_linfo = raw_jited_linfo; + if (addr < *jited_linfo) + return NULL; + + nr_linfo -= nr_skip; + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (start * rec_size); + for (i = 0; i < nr_linfo; i++) { + if (addr < *jited_linfo) + break; + + raw_linfo += rec_size; + raw_jited_linfo += jited_rec_size; + jited_linfo = raw_jited_linfo; + } + + return raw_linfo - rec_size; +} + +const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip) +{ + const struct bpf_line_info *linfo; + __u32 rec_size, nr_linfo, i; + const void *raw_linfo; + + nr_linfo = prog_linfo->nr_linfo; + if (nr_skip >= nr_linfo) + return NULL; + + rec_size = prog_linfo->rec_size; + raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size); + linfo = raw_linfo; + if (insn_off < linfo->insn_off) + return NULL; + + nr_linfo -= nr_skip; + for (i = 0; i < nr_linfo; i++) { + if (insn_off < linfo->insn_off) + break; + + raw_linfo += rec_size; + linfo = raw_linfo; + } + + return raw_linfo - rec_size; +} diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 449591aa9900..1b8d8cdd3575 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -9,12 +10,14 @@ #include <linux/btf.h> #include "btf.h" #include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" -#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) -#define BTF_MAX_NR_TYPES 65535 +#define BTF_MAX_NR_TYPES 0x7fffffff +#define BTF_MAX_STR_OFFSET 0x7fffffff #define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ ((k) == BTF_KIND_VOLATILE) || \ @@ -37,6 +40,52 @@ struct btf { int fd; }; +struct btf_ext_info { + /* + * info points to the individual info section (e.g. func_info and + * line_info) from the .BTF.ext. It does not include the __u32 rec_size. + */ + void *info; + __u32 rec_size; + __u32 len; +}; + +struct btf_ext { + union { + struct btf_ext_header *hdr; + void *data; + }; + struct btf_ext_info func_info; + struct btf_ext_info line_info; + __u32 data_size; +}; + +struct btf_ext_info_sec { + __u32 sec_name_off; + __u32 num_info; + /* Followed by num_info * record_size number of bytes */ + __u8 data[0]; +}; + +/* The minimum bpf_func_info checked by the loader */ +struct bpf_func_info_min { + __u32 insn_off; + __u32 type_id; +}; + +/* The minimum bpf_line_info checked by the loader */ +struct bpf_line_info_min { + __u32 insn_off; + __u32 file_name_off; + __u32 line_off; + __u32 line_col; +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_add_type(struct btf *btf, struct btf_type *t) { if (btf->types_size - btf->nr_types < 2) { @@ -65,54 +114,54 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) return 0; } -static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_hdr(struct btf *btf) { const struct btf_header *hdr = btf->hdr; __u32 meta_left; if (btf->data_size < sizeof(struct btf_header)) { - elog("BTF header not found\n"); + pr_debug("BTF header not found\n"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF version:%u\n", hdr->version); + pr_debug("Unsupported BTF version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF flags:%x\n", hdr->flags); return -ENOTSUP; } meta_left = btf->data_size - sizeof(*hdr); if (!meta_left) { - elog("BTF has no data\n"); + pr_debug("BTF has no data\n"); return -EINVAL; } if (meta_left < hdr->type_off) { - elog("Invalid BTF type section offset:%u\n", hdr->type_off); + pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); return -EINVAL; } if (meta_left < hdr->str_off) { - elog("Invalid BTF string section offset:%u\n", hdr->str_off); + pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); return -EINVAL; } if (hdr->type_off >= hdr->str_off) { - elog("BTF type section offset >= string section offset. No type?\n"); + pr_debug("BTF type section offset >= string section offset. No type?\n"); return -EINVAL; } if (hdr->type_off & 0x02) { - elog("BTF type section is not aligned to 4 bytes\n"); + pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } @@ -121,15 +170,15 @@ static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_str_sec(struct btf *btf) { const struct btf_header *hdr = btf->hdr; const char *start = btf->nohdr_data + hdr->str_off; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || start[0] || end[-1]) { - elog("Invalid BTF string section\n"); + pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -138,7 +187,38 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_type_size(struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); + return -EINVAL; + } +} + +static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; void *nohdr_data = btf->nohdr_data; @@ -147,37 +227,13 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) while (next_type < end_type) { struct btf_type *t = next_type; - __u16 vlen = BTF_INFO_VLEN(t->info); + int type_size; int err; - next_type += sizeof(*t); - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - next_type += sizeof(int); - break; - case BTF_KIND_ARRAY: - next_type += sizeof(struct btf_array); - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - next_type += vlen * sizeof(struct btf_member); - break; - case BTF_KIND_ENUM: - next_type += vlen * sizeof(struct btf_enum); - break; - case BTF_KIND_TYPEDEF: - case BTF_KIND_PTR: - case BTF_KIND_FWD: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - break; - default: - elog("Unsupported BTF_KIND:%u\n", - BTF_INFO_KIND(t->info)); - return -EINVAL; - } - + type_size = btf_type_size(t); + if (type_size < 0) + return type_size; + next_type += type_size; err = btf_add_type(btf, t); if (err) return err; @@ -186,6 +242,11 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } +__u32 btf__get_nr_types(const struct btf *btf) +{ + return btf->nr_types; +} + const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) @@ -204,21 +265,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t) return !t || btf_type_is_void(t); } -static __s64 btf_type_size(const struct btf_type *t) -{ - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - return t->size; - case BTF_KIND_PTR: - return sizeof(void *); - default: - return -EINVAL; - } -} - #define MAX_RESOLVE_DEPTH 32 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) @@ -232,11 +278,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { - size = btf_type_size(t); - if (size >= 0) - break; - switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + size = t->size; + goto done; + case BTF_KIND_PTR: + size = sizeof(void *); + goto done; case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: @@ -260,6 +311,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) if (size < 0) return -EINVAL; +done: if (nelems && size > UINT32_MAX / nelems) return -E2BIG; @@ -317,10 +369,8 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf *btf__new(__u8 *data, __u32 size) { - __u32 log_buf_size = 0; - char *log_buf = NULL; struct btf *btf; int err; @@ -330,16 +380,6 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) btf->fd = -1; - if (err_log) { - log_buf = malloc(BPF_LOG_BUF_SIZE); - if (!log_buf) { - err = -ENOMEM; - goto done; - } - *log_buf = 0; - log_buf_size = BPF_LOG_BUF_SIZE; - } - btf->data = malloc(size); if (!btf->data) { err = -ENOMEM; @@ -349,30 +389,17 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) memcpy(btf->data, data, size); btf->data_size = size; - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); - - if (btf->fd == -1) { - err = -errno; - elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (log_buf && *log_buf) - elog("%s\n", log_buf); - goto done; - } - - err = btf_parse_hdr(btf, err_log); + err = btf_parse_hdr(btf); if (err) goto done; - err = btf_parse_str_sec(btf, err_log); + err = btf_parse_str_sec(btf); if (err) goto done; - err = btf_parse_type_sec(btf, err_log); + err = btf_parse_type_sec(btf); done: - free(log_buf); - if (err) { btf__free(btf); return ERR_PTR(err); @@ -381,11 +408,47 @@ done: return btf; } +int btf__load(struct btf *btf) +{ + __u32 log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf = NULL; + int err = 0; + + if (btf->fd >= 0) + return -EEXIST; + + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + if (btf->fd < 0) { + err = -errno; + pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warning("%s\n", log_buf); + goto done; + } + +done: + free(log_buf); + return err; +} + int btf__fd(const struct btf *btf) { return btf->fd; } +const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +{ + *size = btf->data_size; + return btf->data; +} + const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) @@ -393,3 +456,2195 @@ const char *btf__name_by_offset(const struct btf *btf, __u32 offset) else return NULL; } + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct bpf_btf_info btf_info = { 0 }; + __u32 len = sizeof(btf_info); + __u32 last_size; + int btf_fd; + void *ptr; + int err; + + err = 0; + *btf = NULL; + btf_fd = bpf_btf_get_fd_by_id(id); + if (btf_fd < 0) + return 0; + + /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so + * let's start with a sane default - 4KiB here - and resize it only if + * bpf_obj_get_info_by_fd() needs a bigger buffer. + */ + btf_info.btf_size = 4096; + last_size = btf_info.btf_size; + ptr = malloc(last_size); + if (!ptr) { + err = -ENOMEM; + goto exit_free; + } + + memset(ptr, 0, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + + if (!err && btf_info.btf_size > last_size) { + void *temp_ptr; + + last_size = btf_info.btf_size; + temp_ptr = realloc(ptr, last_size); + if (!temp_ptr) { + err = -ENOMEM; + goto exit_free; + } + ptr = temp_ptr; + memset(ptr, 0, last_size); + btf_info.btf = ptr_to_u64(ptr); + err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); + } + + if (err || btf_info.btf_size > last_size) { + err = errno; + goto exit_free; + } + + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); + if (IS_ERR(*btf)) { + err = PTR_ERR(*btf); + *btf = NULL; + } + +exit_free: + close(btf_fd); + free(ptr); + + return err; +} + +int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); + return -EINVAL; + } + + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); + return -EINVAL; + } + + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", map_name); + return key_size; + } + + if (expected_key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warning("map:%s invalid BTF value_type_size\n", map_name); + return value_size; + } + + if (expected_value_size != value_size) { + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); + return -EINVAL; + } + + *key_type_id = key->type; + *value_type_id = value->type; + + return 0; +} + +struct btf_ext_sec_setup_param { + __u32 off; + __u32 len; + __u32 min_rec_size; + struct btf_ext_info *ext_info; + const char *desc; +}; + +static int btf_ext_setup_info(struct btf_ext *btf_ext, + struct btf_ext_sec_setup_param *ext_sec) +{ + const struct btf_ext_info_sec *sinfo; + struct btf_ext_info *ext_info; + __u32 info_left, record_size; + /* The start of the info sec (including the __u32 record_size). */ + void *info; + + if (ext_sec->off & 0x03) { + pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", + ext_sec->desc); + return -EINVAL; + } + + info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; + info_left = ext_sec->len; + + if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { + pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); + return -EINVAL; + } + + /* At least a record size */ + if (info_left < sizeof(__u32)) { + pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); + return -EINVAL; + } + + /* The record size needs to meet the minimum standard */ + record_size = *(__u32 *)info; + if (record_size < ext_sec->min_rec_size || + record_size & 0x03) { + pr_debug("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); + return -EINVAL; + } + + sinfo = info + sizeof(__u32); + info_left -= sizeof(__u32); + + /* If no records, return failure now so .BTF.ext won't be used. */ + if (!info_left) { + pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + return -EINVAL; + } + + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u64 total_record_size; + __u32 num_records; + + if (info_left < sec_hdrlen) { + pr_debug("%s section header is not found in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + num_records = sinfo->num_info; + if (num_records == 0) { + pr_debug("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + total_record_size = sec_hdrlen + + (__u64)num_records * record_size; + if (info_left < total_record_size) { + pr_debug("%s section has incorrect num_records in .BTF.ext\n", + ext_sec->desc); + return -EINVAL; + } + + info_left -= total_record_size; + sinfo = (void *)sinfo + total_record_size; + } + + ext_info = ext_sec->ext_info; + ext_info->len = ext_sec->len - sizeof(__u32); + ext_info->rec_size = record_size; + ext_info->info = info + sizeof(__u32); + + return 0; +} + +static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +{ + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->func_info_off, + .len = btf_ext->hdr->func_info_len, + .min_rec_size = sizeof(struct bpf_func_info_min), + .ext_info = &btf_ext->func_info, + .desc = "func_info" + }; + + return btf_ext_setup_info(btf_ext, ¶m); +} + +static int btf_ext_setup_line_info(struct btf_ext *btf_ext) +{ + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->line_info_off, + .len = btf_ext->hdr->line_info_len, + .min_rec_size = sizeof(struct bpf_line_info_min), + .ext_info = &btf_ext->line_info, + .desc = "line_info", + }; + + return btf_ext_setup_info(btf_ext, ¶m); +} + +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +{ + const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + + if (data_size < offsetof(struct btf_ext_header, func_info_off) || + data_size < hdr->hdr_len) { + pr_debug("BTF.ext header not found"); + return -EINVAL; + } + + if (hdr->magic != BTF_MAGIC) { + pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); + return -EINVAL; + } + + if (hdr->version != BTF_VERSION) { + pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); + return -ENOTSUP; + } + + if (hdr->flags) { + pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); + return -ENOTSUP; + } + + if (data_size == hdr->hdr_len) { + pr_debug("BTF.ext has no data\n"); + return -EINVAL; + } + + return 0; +} + +void btf_ext__free(struct btf_ext *btf_ext) +{ + if (!btf_ext) + return; + free(btf_ext->data); + free(btf_ext); +} + +struct btf_ext *btf_ext__new(__u8 *data, __u32 size) +{ + struct btf_ext *btf_ext; + int err; + + err = btf_ext_parse_hdr(data, size); + if (err) + return ERR_PTR(err); + + btf_ext = calloc(1, sizeof(struct btf_ext)); + if (!btf_ext) + return ERR_PTR(-ENOMEM); + + btf_ext->data_size = size; + btf_ext->data = malloc(size); + if (!btf_ext->data) { + err = -ENOMEM; + goto done; + } + memcpy(btf_ext->data, data, size); + + err = btf_ext_setup_func_info(btf_ext); + if (err) + goto done; + + err = btf_ext_setup_line_info(btf_ext); + if (err) + goto done; + +done: + if (err) { + btf_ext__free(btf_ext); + return ERR_PTR(err); + } + + return btf_ext; +} + +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +{ + *size = btf_ext->data_size; + return btf_ext->data; +} + +static int btf_ext_reloc_info(const struct btf *btf, + const struct btf_ext_info *ext_info, + const char *sec_name, __u32 insns_cnt, + void **info, __u32 *cnt) +{ + __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, record_size, existing_len, records_len; + struct btf_ext_info_sec *sinfo; + const char *info_sec_name; + __u64 remain_len; + void *data; + + record_size = ext_info->rec_size; + sinfo = ext_info->info; + remain_len = ext_info->len; + while (remain_len > 0) { + records_len = sinfo->num_info * record_size; + info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off); + if (strcmp(info_sec_name, sec_name)) { + remain_len -= sec_hdrlen + records_len; + sinfo = (void *)sinfo + sec_hdrlen + records_len; + continue; + } + + existing_len = (*cnt) * record_size; + data = realloc(*info, existing_len + records_len); + if (!data) + return -ENOMEM; + + memcpy(data + existing_len, sinfo->data, records_len); + /* adjust insn_off only, the rest data will be passed + * to the kernel. + */ + for (i = 0; i < sinfo->num_info; i++) { + __u32 *insn_off; + + insn_off = data + existing_len + (i * record_size); + *insn_off = *insn_off / sizeof(struct bpf_insn) + + insns_cnt; + } + *info = data; + *cnt += sinfo->num_info; + return 0; + } + + return -ENOENT; +} + +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name, + insns_cnt, func_info, cnt); +} + +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt) +{ + return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name, + insns_cnt, line_info, cnt); +} + +__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->func_info.rec_size; +} + +__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) +{ + return btf_ext->line_info.rec_size; +} + +struct btf_dedup; + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); +static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_strings(struct btf_dedup *d); +static int btf_dedup_prim_types(struct btf_dedup *d); +static int btf_dedup_struct_types(struct btf_dedup *d); +static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_compact_types(struct btf_dedup *d); +static int btf_dedup_remap_types(struct btf_dedup *d); + +/* + * Deduplicate BTF types and strings. + * + * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF + * section with all BTF type descriptors and string data. It overwrites that + * memory in-place with deduplicated types and strings without any loss of + * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section + * is provided, all the strings referenced from .BTF.ext section are honored + * and updated to point to the right offsets after deduplication. + * + * If function returns with error, type/string data might be garbled and should + * be discarded. + * + * More verbose and detailed description of both problem btf_dedup is solving, + * as well as solution could be found at: + * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html + * + * Problem description and justification + * ===================================== + * + * BTF type information is typically emitted either as a result of conversion + * from DWARF to BTF or directly by compiler. In both cases, each compilation + * unit contains information about a subset of all the types that are used + * in an application. These subsets are frequently overlapping and contain a lot + * of duplicated information when later concatenated together into a single + * binary. This algorithm ensures that each unique type is represented by single + * BTF type descriptor, greatly reducing resulting size of BTF data. + * + * Compilation unit isolation and subsequent duplication of data is not the only + * problem. The same type hierarchy (e.g., struct and all the type that struct + * references) in different compilation units can be represented in BTF to + * various degrees of completeness (or, rather, incompleteness) due to + * struct/union forward declarations. + * + * Let's take a look at an example, that we'll use to better understand the + * problem (and solution). Suppose we have two compilation units, each using + * same `struct S`, but each of them having incomplete type information about + * struct's fields: + * + * // CU #1: + * struct S; + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * // CU #2: + * struct S; + * struct A; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * In case of CU #1, BTF data will know only that `struct B` exist (but no + * more), but will know the complete type information about `struct A`. While + * for CU #2, it will know full type information about `struct B`, but will + * only know about forward declaration of `struct A` (in BTF terms, it will + * have `BTF_KIND_FWD` type descriptor with name `B`). + * + * This compilation unit isolation means that it's possible that there is no + * single CU with complete type information describing structs `S`, `A`, and + * `B`. Also, we might get tons of duplicated and redundant type information. + * + * Additional complication we need to keep in mind comes from the fact that + * types, in general, can form graphs containing cycles, not just DAGs. + * + * While algorithm does deduplication, it also merges and resolves type + * information (unless disabled throught `struct btf_opts`), whenever possible. + * E.g., in the example above with two compilation units having partial type + * information for structs `A` and `B`, the output of algorithm will emit + * a single copy of each BTF type that describes structs `A`, `B`, and `S` + * (as well as type information for `int` and pointers), as if they were defined + * in a single compilation unit as: + * + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * Algorithm summary + * ================= + * + * Algorithm completes its work in 6 separate passes: + * + * 1. Strings deduplication. + * 2. Primitive types deduplication (int, enum, fwd). + * 3. Struct/union types deduplication. + * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * protos, and const/volatile/restrict modifiers). + * 5. Types compaction. + * 6. Types remapping. + * + * Algorithm determines canonical type descriptor, which is a single + * representative type for each truly unique type. This canonical type is the + * one that will go into final deduplicated BTF type information. For + * struct/unions, it is also the type that algorithm will merge additional type + * information into (while resolving FWDs), as it discovers it from data in + * other CUs. Each input BTF type eventually gets either mapped to itself, if + * that type is canonical, or to some other type, if that type is equivalent + * and was chosen as canonical representative. This mapping is stored in + * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that + * FWD type got resolved to. + * + * To facilitate fast discovery of canonical types, we also maintain canonical + * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash + * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types + * that match that signature. With sufficiently good choice of type signature + * hashing function, we can limit number of canonical types for each unique type + * signature to a very small number, allowing to find canonical type for any + * duplicated type very quickly. + * + * Struct/union deduplication is the most critical part and algorithm for + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + int err; + + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return -EINVAL; + } + + err = btf_dedup_strings(d); + if (err < 0) { + pr_debug("btf_dedup_strings failed:%d\n", err); + goto done; + } + err = btf_dedup_prim_types(d); + if (err < 0) { + pr_debug("btf_dedup_prim_types failed:%d\n", err); + goto done; + } + err = btf_dedup_struct_types(d); + if (err < 0) { + pr_debug("btf_dedup_struct_types failed:%d\n", err); + goto done; + } + err = btf_dedup_ref_types(d); + if (err < 0) { + pr_debug("btf_dedup_ref_types failed:%d\n", err); + goto done; + } + err = btf_dedup_compact_types(d); + if (err < 0) { + pr_debug("btf_dedup_compact_types failed:%d\n", err); + goto done; + } + err = btf_dedup_remap_types(d); + if (err < 0) { + pr_debug("btf_dedup_remap_types failed:%d\n", err); + goto done; + } + +done: + btf_dedup_free(d); + return err; +} + +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14) +#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31 +#define BTF_UNPROCESSED_ID ((__u32)-1) +#define BTF_IN_PROGRESS_ID ((__u32)-2) + +struct btf_dedup_node { + struct btf_dedup_node *next; + __u32 type_id; +}; + +struct btf_dedup { + /* .BTF section to be deduped in-place */ + struct btf *btf; + /* + * Optional .BTF.ext section. When provided, any strings referenced + * from it will be taken into account when deduping strings + */ + struct btf_ext *btf_ext; + /* + * This is a map from any type's signature hash to a list of possible + * canonical representative type candidates. Hash collisions are + * ignored, so even types of various kinds can share same list of + * candidates, which is fine because we rely on subsequent + * btf_xxx_equal() checks to authoritatively verify type equality. + */ + struct btf_dedup_node **dedup_table; + /* Canonical types map */ + __u32 *map; + /* Hypothetical mapping, used during type graph equivalence checks */ + __u32 *hypot_map; + __u32 *hypot_list; + size_t hypot_cnt; + size_t hypot_cap; + /* Various option modifying behavior of algorithm */ + struct btf_dedup_opts opts; +}; + +struct btf_str_ptr { + const char *str; + __u32 new_off; + bool used; +}; + +struct btf_str_ptrs { + struct btf_str_ptr *ptrs; + const char *data; + __u32 cnt; + __u32 cap; +}; + +static inline __u32 hash_combine(__u32 h, __u32 value) +{ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL + return h * 37 + value * GOLDEN_RATIO_PRIME; +#undef GOLDEN_RATIO_PRIME +} + +#define for_each_dedup_cand(d, hash, node) \ + for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \ + node; \ + node = node->next) + +static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) +{ + struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); + int bucket = hash & (d->opts.dedup_table_size - 1); + + if (!node) + return -ENOMEM; + node->type_id = type_id; + node->next = d->dedup_table[bucket]; + d->dedup_table[bucket] = node; + return 0; +} + +static int btf_dedup_hypot_map_add(struct btf_dedup *d, + __u32 from_id, __u32 to_id) +{ + if (d->hypot_cnt == d->hypot_cap) { + __u32 *new_list; + + d->hypot_cap += max(16, d->hypot_cap / 2); + new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + if (!new_list) + return -ENOMEM; + d->hypot_list = new_list; + } + d->hypot_list[d->hypot_cnt++] = from_id; + d->hypot_map[from_id] = to_id; + return 0; +} + +static void btf_dedup_clear_hypot_map(struct btf_dedup *d) +{ + int i; + + for (i = 0; i < d->hypot_cnt; i++) + d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; + d->hypot_cnt = 0; +} + +static void btf_dedup_table_free(struct btf_dedup *d) +{ + struct btf_dedup_node *head, *tmp; + int i; + + if (!d->dedup_table) + return; + + for (i = 0; i < d->opts.dedup_table_size; i++) { + while (d->dedup_table[i]) { + tmp = d->dedup_table[i]; + d->dedup_table[i] = tmp->next; + free(tmp); + } + + head = d->dedup_table[i]; + while (head) { + tmp = head; + head = head->next; + free(tmp); + } + } + + free(d->dedup_table); + d->dedup_table = NULL; +} + +static void btf_dedup_free(struct btf_dedup *d) +{ + btf_dedup_table_free(d); + + free(d->map); + d->map = NULL; + + free(d->hypot_map); + d->hypot_map = NULL; + + free(d->hypot_list); + d->hypot_list = NULL; + + free(d); +} + +/* Find closest power of two >= to size, capped at 2^max_size_log */ +static __u32 roundup_pow2_max(__u32 size, int max_size_log) +{ + int i; + + for (i = 0; i < max_size_log && (1U << i) < size; i++) + ; + return 1U << i; +} + + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + int i, err = 0; + __u32 sz; + + if (!d) + return ERR_PTR(-ENOMEM); + + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + sz = opts && opts->dedup_table_size ? opts->dedup_table_size + : BTF_DEDUP_TABLE_DEFAULT_SIZE; + sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG); + d->opts.dedup_table_size = sz; + + d->btf = btf; + d->btf_ext = btf_ext; + + d->dedup_table = calloc(d->opts.dedup_table_size, + sizeof(struct btf_dedup_node *)); + if (!d->dedup_table) { + err = -ENOMEM; + goto done; + } + + d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->map) { + err = -ENOMEM; + goto done; + } + /* special BTF "void" type is made canonical immediately */ + d->map[0] = 0; + for (i = 1; i <= btf->nr_types; i++) + d->map[i] = BTF_UNPROCESSED_ID; + + d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->hypot_map) { + err = -ENOMEM; + goto done; + } + for (i = 0; i <= btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + +done: + if (err) { + btf_dedup_free(d); + return ERR_PTR(err); + } + + return d; +} + +typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); + +/* + * Iterate over all possible places in .BTF and .BTF.ext that can reference + * string and pass pointer to it to a provided callback `fn`. + */ +static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) +{ + void *line_data_cur, *line_data_end; + int i, j, r, rec_size; + struct btf_type *t; + + for (i = 1; i <= d->btf->nr_types; i++) { + t = d->btf->types[i]; + r = fn(&t->name_off, ctx); + if (r) + return r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_ENUM: { + struct btf_enum *m = (struct btf_enum *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *m = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + default: + break; + } + } + + if (!d->btf_ext) + return 0; + + line_data_cur = d->btf_ext->line_info.info; + line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; + rec_size = d->btf_ext->line_info.rec_size; + + while (line_data_cur < line_data_end) { + struct btf_ext_info_sec *sec = line_data_cur; + struct bpf_line_info_min *line_info; + __u32 num_info = sec->num_info; + + r = fn(&sec->sec_name_off, ctx); + if (r) + return r; + + line_data_cur += sizeof(struct btf_ext_info_sec); + for (i = 0; i < num_info; i++) { + line_info = line_data_cur; + r = fn(&line_info->file_name_off, ctx); + if (r) + return r; + r = fn(&line_info->line_off, ctx); + if (r) + return r; + line_data_cur += rec_size; + } + } + + return 0; +} + +static int str_sort_by_content(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + return strcmp(p1->str, p2->str); +} + +static int str_sort_by_offset(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + if (p1->str != p2->str) + return p1->str < p2->str ? -1 : 1; + return 0; +} + +static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) +{ + const struct btf_str_ptr *p = pelem; + + if (str_ptr != p->str) + return (const char *)str_ptr < p->str ? -1 : 1; + return 0; +} + +static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + s->used = true; + return 0; +} + +static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + *str_off_ptr = s->new_off; + return 0; +} + +/* + * Dedup string and filter out those that are not referenced from either .BTF + * or .BTF.ext (if provided) sections. + * + * This is done by building index of all strings in BTF's string section, + * then iterating over all entities that can reference strings (e.g., type + * names, struct field names, .BTF.ext line info, etc) and marking corresponding + * strings as used. After that all used strings are deduped and compacted into + * sequential blob of memory and new offsets are calculated. Then all the string + * references are iterated again and rewritten using new offsets. + */ +static int btf_dedup_strings(struct btf_dedup *d) +{ + const struct btf_header *hdr = d->btf->hdr; + char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *end = start + d->btf->hdr->str_len; + char *p = start, *tmp_strs = NULL; + struct btf_str_ptrs strs = { + .cnt = 0, + .cap = 0, + .ptrs = NULL, + .data = start, + }; + int i, j, err = 0, grp_idx; + bool grp_used; + + /* build index of all strings */ + while (p < end) { + if (strs.cnt + 1 > strs.cap) { + struct btf_str_ptr *new_ptrs; + + strs.cap += max(strs.cnt / 2, 16); + new_ptrs = realloc(strs.ptrs, + sizeof(strs.ptrs[0]) * strs.cap); + if (!new_ptrs) { + err = -ENOMEM; + goto done; + } + strs.ptrs = new_ptrs; + } + + strs.ptrs[strs.cnt].str = p; + strs.ptrs[strs.cnt].used = false; + + p += strlen(p) + 1; + strs.cnt++; + } + + /* temporary storage for deduplicated strings */ + tmp_strs = malloc(d->btf->hdr->str_len); + if (!tmp_strs) { + err = -ENOMEM; + goto done; + } + + /* mark all used strings */ + strs.ptrs[0].used = true; + err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); + if (err) + goto done; + + /* sort strings by context, so that we can identify duplicates */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + + /* + * iterate groups of equal strings and if any instance in a group was + * referenced, emit single instance and remember new offset + */ + p = tmp_strs; + grp_idx = 0; + grp_used = strs.ptrs[0].used; + /* iterate past end to avoid code duplication after loop */ + for (i = 1; i <= strs.cnt; i++) { + /* + * when i == strs.cnt, we want to skip string comparison and go + * straight to handling last group of strings (otherwise we'd + * need to handle last group after the loop w/ duplicated code) + */ + if (i < strs.cnt && + !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { + grp_used = grp_used || strs.ptrs[i].used; + continue; + } + + /* + * this check would have been required after the loop to handle + * last group of strings, but due to <= condition in a loop + * we avoid that duplication + */ + if (grp_used) { + int new_off = p - tmp_strs; + __u32 len = strlen(strs.ptrs[grp_idx].str); + + memmove(p, strs.ptrs[grp_idx].str, len + 1); + for (j = grp_idx; j < i; j++) + strs.ptrs[j].new_off = new_off; + p += len + 1; + } + + if (i < strs.cnt) { + grp_idx = i; + grp_used = strs.ptrs[i].used; + } + } + + /* replace original strings with deduped ones */ + d->btf->hdr->str_len = p - tmp_strs; + memmove(start, tmp_strs, d->btf->hdr->str_len); + end = start + d->btf->hdr->str_len; + + /* restore original order for further binary search lookups */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + + /* remap string offsets */ + err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + if (err) + goto done; + + d->btf->hdr->str_len = end - start; + +done: + free(tmp_strs); + free(strs.ptrs); + return err; +} + +static __u32 btf_hash_common(struct btf_type *t) +{ + __u32 h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + h = hash_combine(h, t->size); + return h; +} + +static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info && + t1->size == t2->size; +} + +/* Calculate type signature hash of INT. */ +static __u32 btf_hash_int(struct btf_type *t) +{ + __u32 info = *(__u32 *)(t + 1); + __u32 h; + + h = btf_hash_common(t); + h = hash_combine(h, info); + return h; +} + +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +{ + __u32 info1, info2; + + if (!btf_equal_common(t1, t2)) + return false; + info1 = *(__u32 *)(t1 + 1); + info2 = *(__u32 *)(t2 + 1); + return info1 == info2; +} + +/* Calculate type signature hash of ENUM. */ +static __u32 btf_hash_enum(struct btf_type *t) +{ + struct btf_enum *member = (struct btf_enum *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->val); + member++; + } + return h; +} + +/* Check structural equality of two ENUMs. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_enum *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_enum *)(t1 + 1); + m2 = (struct btf_enum *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val != m2->val) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static __u32 btf_hash_struct(struct btf_type *t) +{ + struct btf_member *member = (struct btf_member *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->offset); + /* no hashing of referenced type ID, it can be unresolved yet */ + member++; + } + return h; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_member *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_member *)(t1 + 1); + m2 = (struct btf_member *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->offset != m2->offset) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of ARRAY, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static __u32 btf_hash_array(struct btf_type *t) +{ + struct btf_array *info = (struct btf_array *)(t + 1); + __u32 h = btf_hash_common(t); + + h = hash_combine(h, info->type); + h = hash_combine(h, info->index_type); + h = hash_combine(h, info->nelems); + return h; +} + +/* + * Check exact equality of two ARRAYs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * ARRAY to potential canonical representative. + */ +static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->type == info2->type && + info1->index_type == info2->index_type && + info1->nelems == info2->nelems; +} + +/* + * Check structural compatibility of two ARRAYs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->nelems == info2->nelems; +} + +/* + * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static inline __u32 btf_hash_fnproto(struct btf_type *t) +{ + struct btf_param *member = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->type); + member++; + } + return h; +} + +/* + * Check exact equality of two FUNC_PROTOs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * FUNC_PROTO to potential canonical representative. + */ +static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->type != m2->type) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + /* skip return type ID */ + if (t1->name_off != t2->name_off || t1->info != t2->info) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Deduplicate primitive types, that can't reference other types, by calculating + * their type signature hash and comparing them with any possible canonical + * candidate. If no canonical candidate matches, type itself is marked as + * canonical and is added into `btf_dedup->dedup_table` as another candidate. + */ +static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + struct btf_type *cand; + struct btf_dedup_node *cand_node; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u32 h; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + return 0; + + case BTF_KIND_INT: + h = btf_hash_int(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_int(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_enum(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_FWD: + h = btf_hash_common(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_prim_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Check whether type is already mapped into canonical one (could be to itself). + */ +static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) +{ + return d->map[type_id] <= BTF_MAX_NR_TYPES; +} + +/* + * Resolve type ID into its canonical type ID, if any; otherwise return original + * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow + * STRUCT/UNION link and resolve it into canonical type ID as well. + */ +static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) +{ + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + return type_id; +} + +/* + * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original + * type ID. + */ +static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) +{ + __u32 orig_type_id = type_id; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + return orig_type_id; +} + + +static inline __u16 btf_fwd_kind(struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT; +} + +/* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph + * formed by (potential) canonical struct/union ("canonical graph" for brevity + * here, though keep in mind that not all types in canonical graph are + * necessarily canonical representatives themselves, some of them might be + * duplicates or its uniqueness might not have been established yet). + * Returns: + * - >0, if type graphs are equivalent; + * - 0, if not equivalent; + * - <0, on error. + * + * Algorithm performs side-by-side DFS traversal of both type graphs and checks + * equivalence of BTF types at each step. If at any point BTF types in candidate + * and canonical graphs are not compatible structurally, whole graphs are + * incompatible. If types are structurally equivalent (i.e., all information + * except referenced type IDs is exactly the same), a mapping from `canon_id` to + * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * If a type references other types, then those referenced types are checked + * for equivalence recursively. + * + * During DFS traversal, if we find that for current `canon_id` type we + * already have some mapping in hypothetical map, we check for two possible + * situations: + * - `canon_id` is mapped to exactly the same type as `cand_id`. This will + * happen when type graphs have cycles. In this case we assume those two + * types are equivalent. + * - `canon_id` is mapped to different type. This is contradiction in our + * hypothetical mapping, because same graph in canonical graph corresponds + * to two different types in candidate graph, which for equivalent type + * graphs shouldn't happen. This condition terminates equivalence check + * with negative result. + * + * If type graphs traversal exhausts types to check and find no contradiction, + * then type graphs are equivalent. + * + * When checking types for equivalence, there is one special case: FWD types. + * If FWD type resolution is allowed and one of the types (either from canonical + * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind + * flag) and their names match, hypothetical mapping is updated to point from + * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, + * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. + * + * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, + * if there are two exactly named (or anonymous) structs/unions that are + * compatible structurally, one of which has FWD field, while other is concrete + * STRUCT/UNION, but according to C sources they are different structs/unions + * that are referencing different types with the same name. This is extremely + * unlikely to happen, but btf_dedup API allows to disable FWD resolution if + * this logic is causing problems. + * + * Doing FWD resolution means that both candidate and/or canonical graphs can + * consists of portions of the graph that come from multiple compilation units. + * This is due to the fact that types within single compilation unit are always + * deduplicated and FWDs are already resolved, if referenced struct/union + * definiton is available. So, if we had unresolved FWD and found corresponding + * STRUCT/UNION, they will be from different compilation units. This + * consequently means that when we "link" FWD to corresponding STRUCT/UNION, + * type graph will likely have at least two different BTF types that describe + * same type (e.g., most probably there will be two different BTF types for the + * same 'int' primitive type) and could even have "overlapping" parts of type + * graph that describe same subset of types. + * + * This in turn means that our assumption that each type in canonical graph + * must correspond to exactly one type in candidate graph might not hold + * anymore and will make it harder to detect contradictions using hypothetical + * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION + * resolution only in canonical graph. FWDs in candidate graphs are never + * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs + * that can occur: + * - Both types in canonical and candidate graphs are FWDs. If they are + * structurally equivalent, then they can either be both resolved to the + * same STRUCT/UNION or not resolved at all. In both cases they are + * equivalent and there is no need to resolve FWD on candidate side. + * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, + * so nothing to resolve as well, algorithm will check equivalence anyway. + * - Type in canonical graph is FWD, while type in candidate is concrete + * STRUCT/UNION. In this case candidate graph comes from single compilation + * unit, so there is exactly one BTF type for each unique C type. After + * resolving FWD into STRUCT/UNION, there might be more than one BTF type + * in canonical graph mapping to single BTF type in candidate graph, but + * because hypothetical mapping maps from canonical to candidate types, it's + * alright, and we still maintain the property of having single `canon_id` + * mapping to single `cand_id` (there could be two different `canon_id` + * mapped to the same `cand_id`, but it's not contradictory). + * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate + * graph is FWD. In this case we are just going to check compatibility of + * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll + * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to + * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs + * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from + * canonical graph. + */ +static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, + __u32 canon_id) +{ + struct btf_type *cand_type; + struct btf_type *canon_type; + __u32 hypot_type_id; + __u16 cand_kind; + __u16 canon_kind; + int i, eq; + + /* if both resolve to the same canonical, they must be equivalent */ + if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) + return 1; + + canon_id = resolve_fwd_id(d, canon_id); + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_NR_TYPES) + return hypot_type_id == cand_id; + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) + return -ENOMEM; + + cand_type = d->btf->types[cand_id]; + canon_type = d->btf->types[canon_id]; + cand_kind = BTF_INFO_KIND(cand_type->info); + canon_kind = BTF_INFO_KIND(canon_type->info); + + if (cand_type->name_off != canon_type->name_off) + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; + + if (cand_kind == BTF_KIND_FWD) { + real_kind = canon_kind; + fwd_kind = btf_fwd_kind(cand_type); + } else { + real_kind = cand_kind; + fwd_kind = btf_fwd_kind(canon_type); + } + return fwd_kind == real_kind; + } + + if (cand_type->info != canon_type->info) + return 0; + + switch (cand_kind) { + case BTF_KIND_INT: + return btf_equal_int(cand_type, canon_type); + + case BTF_KIND_ENUM: + return btf_equal_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + return btf_equal_common(cand_type, canon_type); + + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + + case BTF_KIND_ARRAY: { + struct btf_array *cand_arr, *canon_arr; + + if (!btf_compat_array(cand_type, canon_type)) + return 0; + cand_arr = (struct btf_array *)(cand_type + 1); + canon_arr = (struct btf_array *)(canon_type + 1); + eq = btf_dedup_is_equiv(d, + cand_arr->index_type, canon_arr->index_type); + if (eq <= 0) + return eq; + return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *cand_m, *canon_m; + __u16 vlen; + + if (!btf_shallow_equal_struct(cand_type, canon_type)) + return 0; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_m = (struct btf_member *)(cand_type + 1); + canon_m = (struct btf_member *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); + if (eq <= 0) + return eq; + cand_m++; + canon_m++; + } + + return 1; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *cand_p, *canon_p; + __u16 vlen; + + if (!btf_compat_fnproto(cand_type, canon_type)) + return 0; + eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + if (eq <= 0) + return eq; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_p = (struct btf_param *)(cand_type + 1); + canon_p = (struct btf_param *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); + if (eq <= 0) + return eq; + cand_p++; + canon_p++; + } + return 1; + } + + default: + return -EINVAL; + } + return 0; +} + +/* + * Use hypothetical mapping, produced by successful type graph equivalence + * check, to augment existing struct/union canonical mapping, where possible. + * + * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record + * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: + * it doesn't matter if FWD type was part of canonical graph or candidate one, + * we are recording the mapping anyway. As opposed to carefulness required + * for struct/union correspondence mapping (described below), for FWD resolution + * it's not important, as by the time that FWD type (reference type) will be + * deduplicated all structs/unions will be deduped already anyway. + * + * Recording STRUCT/UNION mapping is purely a performance optimization and is + * not required for correctness. It needs to be done carefully to ensure that + * struct/union from candidate's type graph is not mapped into corresponding + * struct/union from canonical type graph that itself hasn't been resolved into + * canonical representative. The only guarantee we have is that canonical + * struct/union was determined as canonical and that won't change. But any + * types referenced through that struct/union fields could have been not yet + * resolved, so in case like that it's too early to establish any kind of + * correspondence between structs/unions. + * + * No canonical correspondence is derived for primitive types (they are already + * deduplicated completely already anyway) or reference types (they rely on + * stability of struct/union canonical relationship for equivalence checks). + */ +static void btf_dedup_merge_hypot_map(struct btf_dedup *d) +{ + __u32 cand_type_id, targ_type_id; + __u16 t_kind, c_kind; + __u32 t_id, c_id; + int i; + + for (i = 0; i < d->hypot_cnt; i++) { + cand_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[cand_type_id]; + t_id = resolve_type_id(d, targ_type_id); + c_id = resolve_type_id(d, cand_type_id); + t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info); + c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info); + /* + * Resolve FWD into STRUCT/UNION. + * It's ok to resolve FWD into STRUCT/UNION that's not yet + * mapped to canonical representative (as opposed to + * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because + * eventually that struct is going to be mapped and all resolved + * FWDs will automatically resolve to correct canonical + * representative. This will happen before ref type deduping, + * which critically depends on stability of these mapping. This + * stability is not a requirement for STRUCT/UNION equivalence + * checks, though. + */ + if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) + d->map[c_id] = t_id; + else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + d->map[t_id] = c_id; + + if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && + c_kind != BTF_KIND_FWD && + is_type_mapped(d, c_id) && + !is_type_mapped(d, t_id)) { + /* + * as a perf optimization, we can map struct/union + * that's part of type graph we just verified for + * equivalence. We can do that for struct/union that has + * canonical representative only, though. + */ + d->map[t_id] = c_id; + } + } +} + +/* + * Deduplicate struct/union types. + * + * For each struct/union type its type signature hash is calculated, taking + * into account type's name, size, number, order and names of fields, but + * ignoring type ID's referenced from fields, because they might not be deduped + * completely until after reference types deduplication phase. This type hash + * is used to iterate over all potential canonical types, sharing same hash. + * For each canonical candidate we check whether type graphs that they form + * (through referenced types in fields and so on) are equivalent using algorithm + * implemented in `btf_dedup_is_equiv`. If such equivalence is found and + * BTF_KIND_FWD resolution is allowed, then hypothetical mapping + * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence + * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to + * potentially map other structs/unions to their canonical representatives, + * if such relationship hasn't yet been established. This speeds up algorithm + * by eliminating some of the duplicate work. + * + * If no matching canonical representative was found, struct/union is marked + * as canonical for itself and is added into btf_dedup->dedup_table hash map + * for further look ups. + */ +static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *cand_type, *t; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u16 kind; + __u32 h; + + /* already deduped or is in process of deduping (loop detected) */ + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return 0; + + t = d->btf->types[type_id]; + kind = BTF_INFO_KIND(t->info); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + return 0; + + h = btf_hash_struct(t); + for_each_dedup_cand(d, h, cand_node) { + int eq; + + /* + * Even though btf_dedup_is_equiv() checks for + * btf_shallow_equal_struct() internally when checking two + * structs (unions) for equivalence, we need to guard here + * from picking matching FWD type as a dedup candidate. + * This can happen due to hash collision. In such case just + * relying on btf_dedup_is_equiv() would lead to potentially + * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because + * FWD and compatible STRUCT/UNION are considered equivalent. + */ + cand_type = d->btf->types[cand_node->type_id]; + if (!btf_shallow_equal_struct(t, cand_type)) + continue; + + btf_dedup_clear_hypot_map(d); + eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); + if (eq < 0) + return eq; + if (!eq) + continue; + new_id = cand_node->type_id; + btf_dedup_merge_hypot_map(d); + break; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_struct_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Deduplicate reference type. + * + * Once all primitive and struct/union types got deduplicated, we can easily + * deduplicate all other (reference) BTF types. This is done in two steps: + * + * 1. Resolve all referenced type IDs into their canonical type IDs. This + * resolution can be done either immediately for primitive or struct/union types + * (because they were deduped in previous two phases) or recursively for + * reference types. Recursion will always terminate at either primitive or + * struct/union type, at which point we can "unwind" chain of reference types + * one by one. There is no danger of encountering cycles because in C type + * system the only way to form type cycle is through struct/union, so any chain + * of reference types, even those taking part in a type cycle, will inevitably + * reach struct/union at some point. + * + * 2. Once all referenced type IDs are resolved into canonical ones, BTF type + * becomes "stable", in the sense that no further deduplication will cause + * any changes to it. With that, it's now possible to calculate type's signature + * hash (this time taking into account referenced type IDs) and loop over all + * potential canonical representatives. If no match was found, current type + * will become canonical representative of itself and will be added into + * btf_dedup->dedup_table as another possible canonical representative. + */ +static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t, *cand; + /* if we don't find equivalent type, then we are representative type */ + __u32 new_id = type_id; + int ref_type_id; + __u32 h; + + if (d->map[type_id] == BTF_IN_PROGRESS_ID) + return -ELOOP; + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return resolve_type_id(d, type_id); + + t = d->btf->types[type_id]; + d->map[type_id] = BTF_IN_PROGRESS_ID; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_common(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ARRAY: { + struct btf_array *info = (struct btf_array *)(t + 1); + + ref_type_id = btf_dedup_ref_type(d, info->type); + if (ref_type_id < 0) + return ref_type_id; + info->type = ref_type_id; + + ref_type_id = btf_dedup_ref_type(d, info->index_type); + if (ref_type_id < 0) + return ref_type_id; + info->index_type = ref_type_id; + + h = btf_hash_array(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_array(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param; + __u16 vlen; + int i; + + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + vlen = BTF_INFO_VLEN(t->info); + param = (struct btf_param *)(t + 1); + for (i = 0; i < vlen; i++) { + ref_type_id = btf_dedup_ref_type(d, param->type); + if (ref_type_id < 0) + return ref_type_id; + param->type = ref_type_id; + param++; + } + + h = btf_hash_fnproto(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_fnproto(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return new_id; +} + +static int btf_dedup_ref_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, i); + if (err < 0) + return err; + } + btf_dedup_table_free(d); + return 0; +} + +/* + * Compact types. + * + * After we established for each type its corresponding canonical representative + * type, we now can eliminate types that are not canonical and leave only + * canonical ones layed out sequentially in memory by copying them over + * duplicates. During compaction btf_dedup->hypot_map array is reused to store + * a map from original type ID to a new compacted type ID, which will be used + * during next phase to "fix up" type IDs, referenced from struct/union and + * reference types. + */ +static int btf_dedup_compact_types(struct btf_dedup *d) +{ + struct btf_type **new_types; + __u32 next_type_id = 1; + char *types_start, *p; + int i, len; + + /* we are going to reuse hypot_map to store compaction remapping */ + d->hypot_map[0] = 0; + for (i = 1; i <= d->btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + types_start = d->btf->nohdr_data + d->btf->hdr->type_off; + p = types_start; + + for (i = 1; i <= d->btf->nr_types; i++) { + if (d->map[i] != i) + continue; + + len = btf_type_size(d->btf->types[i]); + if (len < 0) + return len; + + memmove(p, d->btf->types[i], len); + d->hypot_map[i] = next_type_id; + d->btf->types[next_type_id] = (struct btf_type *)p; + p += len; + next_type_id++; + } + + /* shrink struct btf's internal types index and update btf_header */ + d->btf->nr_types = next_type_id - 1; + d->btf->types_size = d->btf->nr_types; + d->btf->hdr->type_len = p - types_start; + new_types = realloc(d->btf->types, + (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + if (!new_types) + return -ENOMEM; + d->btf->types = new_types; + + /* make sure string section follows type information without gaps */ + d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + memmove(p, d->btf->strings, d->btf->hdr->str_len); + d->btf->strings = p; + p += d->btf->hdr->str_len; + + d->btf->data_size = p - (char *)d->btf->data; + return 0; +} + +/* + * Figure out final (deduplicated and compacted) type ID for provided original + * `type_id` by first resolving it into corresponding canonical type ID and + * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, + * which is populated during compaction phase. + */ +static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) +{ + __u32 resolved_type_id, new_type_id; + + resolved_type_id = resolve_type_id(d, type_id); + new_type_id = d->hypot_map[resolved_type_id]; + if (new_type_id > BTF_MAX_NR_TYPES) + return -EINVAL; + return new_type_id; +} + +/* + * Remap referenced type IDs into deduped type IDs. + * + * After BTF types are deduplicated and compacted, their final type IDs may + * differ from original ones. The map from original to a corresponding + * deduped type ID is stored in btf_dedup->hypot_map and is populated during + * compaction phase. During remapping phase we are rewriting all type IDs + * referenced from any BTF type (e.g., struct fields, func proto args, etc) to + * their final deduped type IDs. + */ +static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + int i, r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + break; + + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + break; + + case BTF_KIND_ARRAY: { + struct btf_array *arr_info = (struct btf_array *)(t + 1); + + r = btf_dedup_remap_type_id(d, arr_info->type); + if (r < 0) + return r; + arr_info->type = r; + r = btf_dedup_remap_type_id(d, arr_info->index_type); + if (r < 0) + return r; + arr_info->index_type = r; + break; + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *member = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, member->type); + if (r < 0) + return r; + member->type = r; + member++; + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, param->type); + if (r < 0) + return r; + param->type = r; + param++; + } + break; + } + + default: + return -EINVAL; + } + + return 0; +} + +static int btf_dedup_remap_types(struct btf_dedup *d) +{ + int i, r; + + for (i = 1; i <= d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, i); + if (r < 0) + return r; + } + return 0; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b77e7080f7e7..28a1e1e59861 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -6,27 +6,99 @@ #include <linux/types.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif #define BTF_ELF_SEC ".BTF" +#define BTF_EXT_ELF_SEC ".BTF.ext" struct btf; +struct btf_ext; struct btf_type; -typedef int (*btf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); +/* + * The .BTF.ext ELF section layout defined as + * struct btf_ext_header + * func_info subsection + * + * The func_info subsection layout: + * record size for struct bpf_func_info in the func_info subsection + * struct btf_sec_func_info for section #1 + * a list of bpf_func_info records for section #1 + * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h + * but may not be identical + * struct btf_sec_func_info for section #2 + * a list of bpf_func_info records for section #2 + * ...... + * + * Note that the bpf_func_info record size in .BTF.ext may not + * be the same as the one defined in include/uapi/linux/bpf.h. + * The loader should ensure that record_size meets minimum + * requirement and pass the record as is to the kernel. The + * kernel will handle the func_info properly based on its contents. + */ +struct btf_ext_header { + __u16 magic; + __u8 version; + __u8 flags; + __u32 hdr_len; + + /* All offsets are in bytes relative to the end of this header */ + __u32 func_info_off; + __u32 func_info_len; + __u32 line_info_off; + __u32 line_info_len; +}; LIBBPF_API void btf__free(struct btf *btf); -LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); + +LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, + __u32 *size); +LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +struct btf_dedup_opts { + unsigned int dedup_table_size; + bool dont_resolve_fwds; +}; + +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); + +#ifdef __cplusplus +} /* extern "C" */ +#endif #endif /* __LIBBPF_BTF_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b607be7236d3..f5eb60379c8d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9,7 +9,9 @@ * Copyright (C) 2017 Nicira, Inc. */ +#ifndef _GNU_SOURCE #define _GNU_SOURCE +#endif #include <stdlib.h> #include <stdio.h> #include <stdarg.h> @@ -24,6 +26,7 @@ #include <linux/kernel.h> #include <linux/bpf.h> #include <linux/btf.h> +#include <linux/filter.h> #include <linux/list.h> #include <linux/limits.h> #include <linux/perf_event.h> @@ -39,6 +42,7 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" +#include "libbpf_util.h" #ifndef EM_BPF #define EM_BPF 247 @@ -50,39 +54,33 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(1, 2) -static int __base_pr(const char *format, ...) +static int __base_pr(enum libbpf_print_level level, const char *format, + va_list args) { - va_list args; - int err; + if (level == LIBBPF_DEBUG) + return 0; - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } -static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_debug; +static libbpf_print_fn_t __libbpf_pr = __base_pr; -#define __pr(func, fmt, ...) \ -do { \ - if ((func)) \ - (func)("libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) - -#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) +void libbpf_set_print(libbpf_print_fn_t fn) +{ + __libbpf_pr = fn; +} -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug) +__printf(2, 3) +void libbpf_print(enum libbpf_print_level level, const char *format, ...) { - __pr_warning = warn; - __pr_info = info; - __pr_debug = debug; + va_list args; + + if (!__libbpf_pr) + return; + + va_start(args, format); + __libbpf_pr(level, format, args); + va_end(args); } #define STRERR_BUFSIZE 128 @@ -114,6 +112,11 @@ void libbpf_set_print(libbpf_print_fn_t warn, # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +struct bpf_capabilities { + /* v4.14: kernel support for program & map names. */ + __u32 name:1; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -124,6 +127,10 @@ struct bpf_program { char *name; int prog_ifindex; char *section_name; + /* section_name with / replaced by _; makes recursive pinning + * in bpf_object__pin_programs easier + */ + char *pin_name; struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; @@ -152,6 +159,16 @@ struct bpf_program { bpf_program_clear_priv_t clear_priv; enum bpf_attach_type expected_attach_type; + int btf_fd; + void *func_info; + __u32 func_info_rec_size; + __u32 func_info_cnt; + + struct bpf_capabilities *caps; + + void *line_info; + __u32 line_info_rec_size; + __u32 line_info_cnt; }; struct bpf_map { @@ -159,6 +176,7 @@ struct bpf_map { char *name; size_t offset; int map_ifindex; + int inner_map_fd; struct bpf_map_def def; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -208,10 +226,13 @@ struct bpf_object { struct list_head list; struct btf *btf; + struct btf_ext *btf_ext; void *priv; bpf_object_clear_priv_t clear_priv; + struct bpf_capabilities caps; + char path[]; }; #define obj_elf_valid(o) ((o)->efile.elf) @@ -237,6 +258,10 @@ void bpf_program__unload(struct bpf_program *prog) prog->instances.nr = -1; zfree(&prog->instances.fds); + + zclose(prog->btf_fd); + zfree(&prog->func_info); + zfree(&prog->line_info); } static void bpf_program__exit(struct bpf_program *prog) @@ -253,6 +278,7 @@ static void bpf_program__exit(struct bpf_program *prog) bpf_program__unload(prog); zfree(&prog->name); zfree(&prog->section_name); + zfree(&prog->pin_name); zfree(&prog->insns); zfree(&prog->reloc_desc); @@ -261,6 +287,17 @@ static void bpf_program__exit(struct bpf_program *prog) prog->idx = -1; } +static char *__bpf_program__pin_name(struct bpf_program *prog) +{ + char *name, *p; + + name = p = strdup(prog->section_name); + while ((p = strchr(p, '/'))) + *p = '_'; + + return name; +} + static int bpf_program__init(void *data, size_t size, char *section_name, int idx, struct bpf_program *prog) @@ -270,7 +307,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, return -EINVAL; } - bzero(prog, sizeof(*prog)); + memset(prog, 0, sizeof(*prog)); prog->section_name = strdup(section_name); if (!prog->section_name) { @@ -279,6 +316,13 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, goto errout; } + prog->pin_name = __bpf_program__pin_name(prog); + if (!prog->pin_name) { + pr_warning("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); + goto errout; + } + prog->insns = malloc(size); if (!prog->insns) { pr_warning("failed to alloc insns for prog under section %s\n", @@ -291,7 +335,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->idx = idx; prog->instances.fds = NULL; prog->instances.nr = -1; - prog->type = BPF_PROG_TYPE_KPROBE; + prog->type = BPF_PROG_TYPE_UNSPEC; + prog->btf_fd = -1; return 0; errout: @@ -310,6 +355,7 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, if (err) return err; + prog.caps = &obj->caps; progs = obj->programs; nr_progs = obj->nr_programs; @@ -562,6 +608,14 @@ static int compare_bpf_map(const void *_a, const void *_b) return a->offset - b->offset; } +static bool bpf_map_type__is_map_in_map(enum bpf_map_type type) +{ + if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + type == BPF_MAP_TYPE_HASH_OF_MAPS) + return true; + return false; +} + static int bpf_object__init_maps(struct bpf_object *obj, int flags) { @@ -625,13 +679,15 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) } obj->nr_maps = nr_maps; - /* - * fill all fd with -1 so won't close incorrect - * fd (fd=0 is stdin) when failure (zclose won't close - * negative fd)). - */ - for (i = 0; i < nr_maps; i++) + for (i = 0; i < nr_maps; i++) { + /* + * fill all fd with -1 so won't close incorrect + * fd (fd=0 is stdin) when failure (zclose won't close + * negative fd)). + */ obj->maps[i].fd = -1; + obj->maps[i].inner_map_fd = -1; + } /* * Fill obj->maps using data in "maps" section. @@ -723,6 +779,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; + Elf_Data *btf_ext_data = NULL; Elf_Scn *scn = NULL; int idx = 0, err = 0; @@ -777,13 +834,14 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size, - __pr_debug); - if (IS_ERR(obj->btf)) { + obj->btf = btf__new(data->d_buf, data->d_size); + if (IS_ERR(obj->btf) || btf__load(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; } + } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { + btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { pr_warning("bpf: multiple SYMTAB in %s\n", @@ -845,6 +903,21 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) pr_warning("Corrupted ELF file: index of strtab invalid\n"); return LIBBPF_ERRNO__FORMAT; } + if (btf_ext_data) { + if (!obj->btf) { + pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n", + BTF_EXT_ELF_SEC, BTF_ELF_SEC); + } else { + obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, + btf_ext_data->d_size); + if (IS_ERR(obj->btf_ext)) { + pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, + PTR_ERR(obj->btf_ext)); + obj->btf_ext = NULL; + } + } + } if (obj->efile.maps_shndx >= 0) { err = bpf_object__init_maps(obj, flags); if (err) @@ -977,72 +1050,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { - const struct btf_type *container_type; - const struct btf_member *key, *value; struct bpf_map_def *def = &map->def; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == - max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map->name, map->name); - return -EINVAL; - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map->name, container_name); - return container_id; - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map->name, container_id); - return -EINVAL; - } - - if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || - BTF_INFO_VLEN(container_type->info) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map->name, container_name); - return -EINVAL; - } - - key = (struct btf_member *)(container_type + 1); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", - map->name); - return key_size; - } - - if (def->key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map->name, (__u32)key_size, def->key_size); - return -EINVAL; - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map->name); - return value_size; - } + __u32 key_type_id, value_type_id; + int ret; - if (def->value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map->name, (__u32)value_size, def->value_size); - return -EINVAL; - } + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + if (ret) + return ret; - map->btf_key_type_id = key->type; - map->btf_value_type_id = value->type; + map->btf_key_type_id = key_type_id; + map->btf_value_type_id = value_type_id; return 0; } @@ -1094,6 +1113,66 @@ err_free_new_name: return -errno; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + /* If map already created, its attributes can't be changed. */ + if (map->fd >= 0) + return -EBUSY; + + map->def.max_entries = max_entries; + + return 0; +} + +static int +bpf_object__probe_name(struct bpf_object *obj) +{ + struct bpf_load_program_attr attr; + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int ret; + + /* make sure basic loading works */ + + memset(&attr, 0, sizeof(attr)); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.insns = insns; + attr.insns_cnt = ARRAY_SIZE(insns); + attr.license = "GPL"; + + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret < 0) { + cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); + return -errno; + } + close(ret); + + /* now try the same program, but with the name */ + + attr.name = "test"; + ret = bpf_load_program_xattr(&attr, NULL, 0); + if (ret >= 0) { + obj->caps.name = 1; + close(ret); + } + + return 0; +} + +static int +bpf_object__probe_caps(struct bpf_object *obj) +{ + return bpf_object__probe_name(obj); +} + static int bpf_object__create_maps(struct bpf_object *obj) { @@ -1113,7 +1192,8 @@ bpf_object__create_maps(struct bpf_object *obj) continue; } - create_attr.name = map->name; + if (obj->caps.name) + create_attr.name = map->name; create_attr.map_ifindex = map->map_ifindex; create_attr.map_type = def->type; create_attr.map_flags = def->map_flags; @@ -1123,6 +1203,9 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; + if (bpf_map_type__is_map_in_map(def->type) && + map->inner_map_fd >= 0) + create_attr.inner_map_fd = map->inner_map_fd; if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) { create_attr.btf_fd = btf__fd(obj->btf); @@ -1161,12 +1244,89 @@ bpf_object__create_maps(struct bpf_object *obj) } static int +check_btf_ext_reloc_err(struct bpf_program *prog, int err, + void *btf_prog_info, const char *info_name) +{ + if (err != -ENOENT) { + pr_warning("Error in loading %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */ + + if (btf_prog_info) { + /* + * Some info has already been found but has problem + * in the last btf_ext reloc. Must have to error + * out. + */ + pr_warning("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); + return err; + } + + /* + * Have problem loading the very first info. Ignore + * the rest. + */ + pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); + return 0; +} + +static int +bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj, + const char *section_name, __u32 insn_offset) +{ + int err; + + if (!insn_offset || prog->func_info) { + /* + * !insn_offset => main program + * + * For sub prog, the main program's func_info has to + * be loaded first (i.e. prog->func_info != NULL) + */ + err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->func_info, + &prog->func_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->func_info, + "bpf_func_info"); + + prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext); + } + + if (!insn_offset || prog->line_info) { + err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext, + section_name, insn_offset, + &prog->line_info, + &prog->line_info_cnt); + if (err) + return check_btf_ext_reloc_err(prog, err, + prog->line_info, + "bpf_line_info"); + + prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); + } + + if (!insn_offset) + prog->btf_fd = btf__fd(obj->btf); + + return 0; +} + +static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) { struct bpf_insn *insn, *new_insn; struct bpf_program *text; size_t new_cnt; + int err; if (relo->type != RELO_CALL) return -LIBBPF_ERRNO__RELOC; @@ -1189,6 +1349,15 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, pr_warning("oom in prog realloc\n"); return -ENOMEM; } + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + text->section_name, + prog->insns_cnt); + if (err) + return err; + } + memcpy(new_insn + prog->insns_cnt, text->insns, text->insns_cnt * sizeof(*insn)); prog->insns = new_insn; @@ -1208,7 +1377,17 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) { int i, err; - if (!prog || !prog->reloc_desc) + if (!prog) + return 0; + + if (obj->btf_ext) { + err = bpf_program_reloc_btf_ext(prog, obj, + prog->section_name, 0); + if (err) + return err; + } + + if (!prog->reloc_desc) return 0; for (i = 0; i < prog->nr_reloc; i++) { @@ -1296,9 +1475,8 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) } static int -load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, - const char *name, struct bpf_insn *insns, int insns_cnt, - char *license, __u32 kern_version, int *pfd, int prog_ifindex) +load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, + char *license, __u32 kern_version, int *pfd) { struct bpf_load_program_attr load_attr; char *cp, errmsg[STRERR_BUFSIZE]; @@ -1306,15 +1484,22 @@ load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type, int ret; memset(&load_attr, 0, sizeof(struct bpf_load_program_attr)); - load_attr.prog_type = type; - load_attr.expected_attach_type = expected_attach_type; - load_attr.name = name; + load_attr.prog_type = prog->type; + load_attr.expected_attach_type = prog->expected_attach_type; + if (prog->caps->name) + load_attr.name = prog->name; load_attr.insns = insns; load_attr.insns_cnt = insns_cnt; load_attr.license = license; load_attr.kern_version = kern_version; - load_attr.prog_ifindex = prog_ifindex; - + load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + load_attr.func_info = prog->func_info; + load_attr.func_info_rec_size = prog->func_info_rec_size; + load_attr.func_info_cnt = prog->func_info_cnt; + load_attr.line_info = prog->line_info; + load_attr.line_info_rec_size = prog->line_info_rec_size; + load_attr.line_info_cnt = prog->line_info_cnt; if (!load_attr.insns || !load_attr.insns_cnt) return -EINVAL; @@ -1394,10 +1579,8 @@ bpf_program__load(struct bpf_program *prog, pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", prog->section_name, prog->instances.nr); } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, prog->insns, prog->insns_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + err = load_program(prog, prog->insns, prog->insns_cnt, + license, kern_version, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -1407,7 +1590,7 @@ bpf_program__load(struct bpf_program *prog, struct bpf_prog_prep_result result; bpf_program_prep_t preprocessor = prog->preprocessor; - bzero(&result, sizeof(result)); + memset(&result, 0, sizeof(result)); err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { @@ -1425,11 +1608,9 @@ bpf_program__load(struct bpf_program *prog, continue; } - err = load_program(prog->type, prog->expected_attach_type, - prog->name, result.new_insn_ptr, + err = load_program(prog, result.new_insn_ptr, result.new_insn_cnt, - license, kern_version, &fd, - prog->prog_ifindex); + license, kern_version, &fd); if (err) { pr_warning("Loading the %dth instance of program '%s' failed\n", @@ -1495,12 +1676,12 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) case BPF_PROG_TYPE_LIRC_MODE2: case BPF_PROG_TYPE_SK_REUSEPORT: case BPF_PROG_TYPE_FLOW_DISSECTOR: - return false; case BPF_PROG_TYPE_UNSPEC: - case BPF_PROG_TYPE_KPROBE: case BPF_PROG_TYPE_TRACEPOINT: - case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + return false; + case BPF_PROG_TYPE_KPROBE: default: return true; } @@ -1627,6 +1808,7 @@ int bpf_object__load(struct bpf_object *obj) obj->loaded = true; + CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__create_maps(obj), err, out); CHECK_ERR(bpf_object__relocate(obj), err, out); CHECK_ERR(bpf_object__load_progs(obj), err, out); @@ -1699,6 +1881,34 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return 0; } +int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, + int instance) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (instance < 0 || instance >= prog->instances.nr) { + pr_warning("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned program '%s'\n", path); + + return 0; +} + static int make_dir(const char *path) { char *cp, errmsg[STRERR_BUFSIZE]; @@ -1733,6 +1943,11 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return -EINVAL; } + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__pin_instance(prog, path, 0); + } + err = make_dir(path); if (err) return err; @@ -1742,16 +1957,83 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) int len; len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) { + err = -EINVAL; + goto err_unpin; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin; + } + + err = bpf_program__pin_instance(prog, buf, i); + if (err) + goto err_unpin; + } + + return 0; + +err_unpin: + for (i = i - 1; i >= 0; i--) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin_instance(prog, buf, i); + } + + rmdir(path); + + return err; +} + +int bpf_program__unpin(struct bpf_program *prog, const char *path) +{ + int i, err; + + err = check_path(path); + if (err) + return err; + + if (prog == NULL) { + pr_warning("invalid program pointer\n"); + return -EINVAL; + } + + if (prog->instances.nr <= 0) { + pr_warning("no instances of prog %s to pin\n", + prog->section_name); + return -EINVAL; + } + + if (prog->instances.nr == 1) { + /* don't create subdirs when pinning single instance */ + return bpf_program__unpin_instance(prog, path, 0); + } + + for (i = 0; i < prog->instances.nr; i++) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%d", path, i); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin_instance(prog, buf, i); + err = bpf_program__unpin_instance(prog, buf, i); if (err) return err; } + err = rmdir(path); + if (err) + return -errno; + return 0; } @@ -1776,12 +2058,33 @@ int bpf_map__pin(struct bpf_map *map, const char *path) } pr_debug("pinned map '%s'\n", path); + return 0; } -int bpf_object__pin(struct bpf_object *obj, const char *path) +int bpf_map__unpin(struct bpf_map *map, const char *path) +{ + int err; + + err = check_path(path); + if (err) + return err; + + if (map == NULL) { + pr_warning("invalid map pointer\n"); + return -EINVAL; + } + + err = unlink(path); + if (err != 0) + return -errno; + pr_debug("unpinned map '%s'\n", path); + + return 0; +} + +int bpf_object__pin_maps(struct bpf_object *obj, const char *path) { - struct bpf_program *prog; struct bpf_map *map; int err; @@ -1797,7 +2100,54 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) if (err) return err; - bpf_map__for_each(map, obj) { + bpf_object__for_each_map(map, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) { + err = -EINVAL; + goto err_unpin_maps; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_maps; + } + + err = bpf_map__pin(map, buf); + if (err) + goto err_unpin_maps; + } + + return 0; + +err_unpin_maps: + while ((map = bpf_map__prev(map, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + bpf_map__name(map)); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_map__unpin(map, buf); + } + + return err; +} + +int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) +{ + struct bpf_map *map; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_map(map, obj) { char buf[PATH_MAX]; int len; @@ -1808,23 +2158,90 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_map__pin(map, buf); + err = bpf_map__unpin(map, buf); if (err) return err; } + return 0; +} + +int bpf_object__pin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + if (!obj->loaded) { + pr_warning("object not yet loaded; load it first\n"); + return -ENOENT; + } + + err = make_dir(path); + if (err) + return err; + bpf_object__for_each_program(prog, obj) { char buf[PATH_MAX]; int len; len = snprintf(buf, PATH_MAX, "%s/%s", path, - prog->section_name); + prog->pin_name); + if (len < 0) { + err = -EINVAL; + goto err_unpin_programs; + } else if (len >= PATH_MAX) { + err = -ENAMETOOLONG; + goto err_unpin_programs; + } + + err = bpf_program__pin(prog, buf); + if (err) + goto err_unpin_programs; + } + + return 0; + +err_unpin_programs: + while ((prog = bpf_program__prev(prog, obj))) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); + if (len < 0) + continue; + else if (len >= PATH_MAX) + continue; + + bpf_program__unpin(prog, buf); + } + + return err; +} + +int bpf_object__unpin_programs(struct bpf_object *obj, const char *path) +{ + struct bpf_program *prog; + int err; + + if (!obj) + return -ENOENT; + + bpf_object__for_each_program(prog, obj) { + char buf[PATH_MAX]; + int len; + + len = snprintf(buf, PATH_MAX, "%s/%s", path, + prog->pin_name); if (len < 0) return -EINVAL; else if (len >= PATH_MAX) return -ENAMETOOLONG; - err = bpf_program__pin(prog, buf); + err = bpf_program__unpin(prog, buf); if (err) return err; } @@ -1832,6 +2249,23 @@ int bpf_object__pin(struct bpf_object *obj, const char *path) return 0; } +int bpf_object__pin(struct bpf_object *obj, const char *path) +{ + int err; + + err = bpf_object__pin_maps(obj, path); + if (err) + return err; + + err = bpf_object__pin_programs(obj, path); + if (err) { + bpf_object__unpin_maps(obj, path); + return err; + } + + return 0; +} + void bpf_object__close(struct bpf_object *obj) { size_t i; @@ -1845,6 +2279,7 @@ void bpf_object__close(struct bpf_object *obj) bpf_object__elf_finish(obj); bpf_object__unload(obj); btf__free(obj->btf); + btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { zfree(&obj->maps[i].name); @@ -1896,6 +2331,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +struct btf *bpf_object__btf(struct bpf_object *obj) +{ + return obj ? obj->btf : NULL; +} + int bpf_object__btf_fd(const struct bpf_object *obj) { return obj->btf ? btf__fd(obj->btf) : -1; @@ -1918,23 +2358,26 @@ void *bpf_object__priv(struct bpf_object *obj) } static struct bpf_program * -__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) +__bpf_program__iter(struct bpf_program *p, struct bpf_object *obj, bool forward) { - size_t idx; + size_t nr_programs = obj->nr_programs; + ssize_t idx; - if (!obj->programs) + if (!nr_programs) return NULL; - /* First handler */ - if (prev == NULL) - return &obj->programs[0]; - if (prev->obj != obj) { + if (!p) + /* Iter from the beginning */ + return forward ? &obj->programs[0] : + &obj->programs[nr_programs - 1]; + + if (p->obj != obj) { pr_warning("error: program handler doesn't match object\n"); return NULL; } - idx = (prev - obj->programs) + 1; - if (idx >= obj->nr_programs) + idx = (p - obj->programs) + (forward ? 1 : -1); + if (idx >= obj->nr_programs || idx < 0) return NULL; return &obj->programs[idx]; } @@ -1945,7 +2388,19 @@ bpf_program__next(struct bpf_program *prev, struct bpf_object *obj) struct bpf_program *prog = prev; do { - prog = __bpf_program__next(prog, obj); + prog = __bpf_program__iter(prog, obj, true); + } while (prog && bpf_program__is_function_storage(prog, obj)); + + return prog; +} + +struct bpf_program * +bpf_program__prev(struct bpf_program *next, struct bpf_object *obj) +{ + struct bpf_program *prog = next; + + do { + prog = __bpf_program__iter(prog, obj, false); } while (prog && bpf_program__is_function_storage(prog, obj)); return prog; @@ -2084,19 +2539,19 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, atype) \ - { string, sizeof(string) - 1, ptype, eatype, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \ + { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype } /* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, -EINVAL) +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0) /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, atype) + BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype) /* Programs that must specify expected attach type at load time. */ #define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype) + BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype) /* Programs that can be attached but attach type can't be identified by section * name. Kept for backward compatibility. @@ -2108,6 +2563,7 @@ static const struct { size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; + int is_attachable; enum bpf_attach_type attach_type; } section_names[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -2169,9 +2625,38 @@ static const struct { #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#define MAX_TYPE_NAME_SIZE 32 + +static char *libbpf_get_type_names(bool attach_type) +{ + int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + char *buf; + + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + /* Forge string buf with all available names */ + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (attach_type && !section_names[i].is_attachable) + continue; + + if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + free(buf); + return NULL; + } + strcat(buf, " "); + strcat(buf, section_names[i].sec); + } + + return buf; +} + int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + char *type_names; int i; if (!name) @@ -2184,12 +2669,20 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, *expected_attach_type = section_names[i].expected_attach_type; return 0; } + pr_warning("failed to guess program type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(false); + if (type_names != NULL) { + pr_info("supported section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { + char *type_names; int i; if (!name) @@ -2198,11 +2691,18 @@ int libbpf_attach_type_by_name(const char *name, for (i = 0; i < ARRAY_SIZE(section_names); i++) { if (strncmp(name, section_names[i].sec, section_names[i].len)) continue; - if (section_names[i].attach_type == -EINVAL) + if (!section_names[i].is_attachable) return -EINVAL; *attach_type = section_names[i].attach_type; return 0; } + pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_info("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } @@ -2271,10 +2771,24 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) map->map_ifindex = ifindex; } -struct bpf_map * -bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { - size_t idx; + if (!bpf_map_type__is_map_in_map(map->def.type)) { + pr_warning("error: unsupported map type\n"); + return -EINVAL; + } + if (map->inner_map_fd != -1) { + pr_warning("error: inner_map_fd already specified\n"); + return -EINVAL; + } + map->inner_map_fd = fd; + return 0; +} + +static struct bpf_map * +__bpf_map__iter(struct bpf_map *m, struct bpf_object *obj, int i) +{ + ssize_t idx; struct bpf_map *s, *e; if (!obj || !obj->maps) @@ -2283,33 +2797,57 @@ bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) s = obj->maps; e = obj->maps + obj->nr_maps; - if (prev == NULL) - return s; - - if ((prev < s) || (prev >= e)) { + if ((m < s) || (m >= e)) { pr_warning("error in %s: map handler doesn't belong to object\n", __func__); return NULL; } - idx = (prev - obj->maps) + 1; - if (idx >= obj->nr_maps) + idx = (m - obj->maps) + i; + if (idx >= obj->nr_maps || idx < 0) return NULL; return &obj->maps[idx]; } struct bpf_map * +bpf_map__next(struct bpf_map *prev, struct bpf_object *obj) +{ + if (prev == NULL) + return obj->maps; + + return __bpf_map__iter(prev, obj, 1); +} + +struct bpf_map * +bpf_map__prev(struct bpf_map *next, struct bpf_object *obj) +{ + if (next == NULL) { + if (!obj->nr_maps) + return NULL; + return obj->maps + obj->nr_maps - 1; + } + + return __bpf_map__iter(next, obj, -1); +} + +struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { struct bpf_map *pos; - bpf_map__for_each(pos, obj) { + bpf_object__for_each_map(pos, obj) { if (pos->name && !strcmp(pos->name, name)) return pos; } return NULL; } +int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) +{ + return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); +} + struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) { @@ -2377,8 +2915,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, err = bpf_program__identify_section(prog, &prog_type, &expected_attach_type); if (err < 0) { - pr_warning("failed to guess program type based on section name %s\n", - prog->section_name); bpf_object__close(obj); return -EINVAL; } @@ -2392,7 +2928,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, first_prog = prog; } - bpf_map__for_each(map, obj) { + bpf_object__for_each_map(map, obj) { if (!bpf_map__is_offload_neutral(map)) map->map_ifindex = attr->ifindex; } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1f3468dad8b2..b4652aa1a58a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -16,6 +16,10 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#ifdef __cplusplus +extern "C" { +#endif + #ifndef LIBBPF_API #define LIBBPF_API __attribute__((visibility("default"))) #endif @@ -43,17 +47,16 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); -/* - * __printf is defined in include/linux/compiler-gcc.h. However, - * it would be better if libbpf.h didn't depend on Linux header files. - * So instead of __printf, here we use gcc attribute directly. - */ -typedef int (*libbpf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + +typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, + const char *, va_list ap); -LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; @@ -71,6 +74,13 @@ struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, size_t obj_buf_sz, const char *name); +LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path); +LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj, + const char *path); +LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, + const char *path); LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); LIBBPF_API void bpf_object__close(struct bpf_object *object); @@ -79,6 +89,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); + +struct btf; +LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * @@ -112,6 +125,9 @@ LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog, (pos) != NULL; \ (pos) = bpf_program__next((pos), (obj))) +LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog, + struct bpf_object *obj); + typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *); @@ -131,7 +147,11 @@ LIBBPF_API int bpf_program__fd(struct bpf_program *prog); LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, const char *path, int instance); +LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog, + const char *path, + int instance); LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); +LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path); LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_insn; @@ -246,6 +266,9 @@ struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); +LIBBPF_API int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); + /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. @@ -255,10 +278,14 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); LIBBPF_API struct bpf_map * bpf_map__next(struct bpf_map *map, struct bpf_object *obj); -#define bpf_map__for_each(pos, obj) \ +#define bpf_object__for_each_map(pos, obj) \ for ((pos) = bpf_map__next(NULL, (obj)); \ (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) +#define bpf_map__for_each bpf_object__for_each_map + +LIBBPF_API struct bpf_map * +bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); LIBBPF_API int bpf_map__fd(struct bpf_map *map); LIBBPF_API const struct bpf_map_def *bpf_map__def(struct bpf_map *map); @@ -271,9 +298,13 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); +LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); + +LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd); LIBBPF_API long libbpf_get_error(const void *ptr); @@ -290,6 +321,7 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, @@ -317,4 +349,36 @@ int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + +struct bpf_prog_linfo; +struct bpf_prog_info; + +LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo); +LIBBPF_API struct bpf_prog_linfo * +bpf_prog_linfo__new(const struct bpf_prog_info *info); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo, + __u64 addr, __u32 func_idx, __u32 nr_skip); +LIBBPF_API const struct bpf_line_info * +bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, + __u32 insn_off, __u32 nr_skip); + +/* + * Probe for supported system features + * + * Note that running many of these probes in a short amount of time can cause + * the kernel to reach the maximal size of lockable memory allowed for the + * user, causing subsequent probes to fail. In this case, the caller may want + * to adjust that limit with setrlimit(). + */ +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, + __u32 ifindex); +LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, + enum bpf_prog_type prog_type, __u32 ifindex); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + #endif /* __LIBBPF_LIBBPF_H */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map new file mode 100644 index 000000000000..778a26702a70 --- /dev/null +++ b/tools/lib/bpf/libbpf.map @@ -0,0 +1,156 @@ +LIBBPF_0.0.1 { + global: + bpf_btf_get_fd_by_id; + bpf_create_map; + bpf_create_map_in_map; + bpf_create_map_in_map_node; + bpf_create_map_name; + bpf_create_map_node; + bpf_create_map_xattr; + bpf_load_btf; + bpf_load_program; + bpf_load_program_xattr; + bpf_map__btf_key_type_id; + bpf_map__btf_value_type_id; + bpf_map__def; + bpf_map__fd; + bpf_map__is_offload_neutral; + bpf_map__name; + bpf_map__next; + bpf_map__pin; + bpf_map__prev; + bpf_map__priv; + bpf_map__reuse_fd; + bpf_map__set_ifindex; + bpf_map__set_inner_map_fd; + bpf_map__set_priv; + bpf_map__unpin; + bpf_map_delete_elem; + bpf_map_get_fd_by_id; + bpf_map_get_next_id; + bpf_map_get_next_key; + bpf_map_lookup_and_delete_elem; + bpf_map_lookup_elem; + bpf_map_update_elem; + bpf_obj_get; + bpf_obj_get_info_by_fd; + bpf_obj_pin; + bpf_object__btf_fd; + bpf_object__close; + bpf_object__find_map_by_name; + bpf_object__find_map_by_offset; + bpf_object__find_program_by_title; + bpf_object__kversion; + bpf_object__load; + bpf_object__name; + bpf_object__next; + bpf_object__open; + bpf_object__open_buffer; + bpf_object__open_xattr; + bpf_object__pin; + bpf_object__pin_maps; + bpf_object__pin_programs; + bpf_object__priv; + bpf_object__set_priv; + bpf_object__unload; + bpf_object__unpin_maps; + bpf_object__unpin_programs; + bpf_perf_event_read_simple; + bpf_prog_attach; + bpf_prog_detach; + bpf_prog_detach2; + bpf_prog_get_fd_by_id; + bpf_prog_get_next_id; + bpf_prog_load; + bpf_prog_load_xattr; + bpf_prog_query; + bpf_prog_test_run; + bpf_prog_test_run_xattr; + bpf_program__fd; + bpf_program__is_kprobe; + bpf_program__is_perf_event; + bpf_program__is_raw_tracepoint; + bpf_program__is_sched_act; + bpf_program__is_sched_cls; + bpf_program__is_socket_filter; + bpf_program__is_tracepoint; + bpf_program__is_xdp; + bpf_program__load; + bpf_program__next; + bpf_program__nth_fd; + bpf_program__pin; + bpf_program__pin_instance; + bpf_program__prev; + bpf_program__priv; + bpf_program__set_expected_attach_type; + bpf_program__set_ifindex; + bpf_program__set_kprobe; + bpf_program__set_perf_event; + bpf_program__set_prep; + bpf_program__set_priv; + bpf_program__set_raw_tracepoint; + bpf_program__set_sched_act; + bpf_program__set_sched_cls; + bpf_program__set_socket_filter; + bpf_program__set_tracepoint; + bpf_program__set_type; + bpf_program__set_xdp; + bpf_program__title; + bpf_program__unload; + bpf_program__unpin; + bpf_program__unpin_instance; + bpf_prog_linfo__free; + bpf_prog_linfo__new; + bpf_prog_linfo__lfind_addr_func; + bpf_prog_linfo__lfind; + bpf_raw_tracepoint_open; + bpf_set_link_xdp_fd; + bpf_task_fd_query; + bpf_verify_program; + btf__fd; + btf__find_by_name; + btf__free; + btf__get_from_id; + btf__name_by_offset; + btf__new; + btf__resolve_size; + btf__resolve_type; + btf__type_by_id; + libbpf_attach_type_by_name; + libbpf_get_error; + libbpf_prog_type_by_name; + libbpf_set_print; + libbpf_strerror; + local: + *; +}; + +LIBBPF_0.0.2 { + global: + bpf_probe_helper; + bpf_probe_map_type; + bpf_probe_prog_type; + bpf_map__resize; + bpf_map_lookup_elem_flags; + bpf_object__btf; + bpf_object__find_map_fd_by_name; + bpf_get_link_xdp_id; + btf__dedup; + btf__get_map_kv_tids; + btf__get_nr_types; + btf__get_raw_data; + btf__load; + btf_ext__free; + btf_ext__func_info_rec_size; + btf_ext__get_raw_data; + btf_ext__line_info_rec_size; + btf_ext__new; + btf_ext__reloc_func_info; + btf_ext__reloc_line_info; + xsk_umem__create; + xsk_socket__create; + xsk_umem__delete; + xsk_socket__delete; + xsk_umem__fd; + xsk_socket__fd; +} LIBBPF_0.0.1; diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c index d83b17f8435c..4343e40588c6 100644 --- a/tools/lib/bpf/libbpf_errno.c +++ b/tools/lib/bpf/libbpf_errno.c @@ -7,6 +7,7 @@ * Copyright (C) 2017 Nicira, Inc. */ +#undef _GNU_SOURCE #include <stdio.h> #include <string.h> diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c new file mode 100644 index 000000000000..8c3a1c04dcb2 --- /dev/null +++ b/tools/lib/bpf/libbpf_probes.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Netronome Systems, Inc. */ + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <net/if.h> +#include <sys/utsname.h> + +#include <linux/filter.h> +#include <linux/kernel.h> + +#include "bpf.h" +#include "libbpf.h" + +static bool grep(const char *buffer, const char *pattern) +{ + return !!strstr(buffer, pattern); +} + +static int get_vendor_id(int ifindex) +{ + char ifname[IF_NAMESIZE], path[64], buf[8]; + ssize_t len; + int fd; + + if (!if_indextoname(ifindex, ifname)) + return -1; + + snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) + return -1; + if (len >= (ssize_t)sizeof(buf)) + return -1; + buf[len] = '\0'; + + return strtol(buf, NULL, 0); +} + +static int get_kernel_version(void) +{ + int version, subversion, patchlevel; + struct utsname utsn; + + /* Return 0 on failure, and attempt to probe with empty kversion */ + if (uname(&utsn)) + return 0; + + if (sscanf(utsn.release, "%d.%d.%d", + &version, &subversion, &patchlevel) != 3) + return 0; + + return (version << 16) + (subversion << 8) + patchlevel; +} + +static void +probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) +{ + struct bpf_load_program_attr xattr = {}; + int fd; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + break; + case BPF_PROG_TYPE_KPROBE: + xattr.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + default: + break; + } + + xattr.prog_type = prog_type; + xattr.insns = insns; + xattr.insns_cnt = insns_cnt; + xattr.license = "GPL"; + xattr.prog_ifindex = ifindex; + + fd = bpf_load_program_xattr(&xattr, buf, buf_len); + if (fd >= 0) + close(fd); +} + +bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) + /* nfp returns -EINVAL on exit(0) with TC offload */ + insns[0].imm = 2; + + errno = 0; + probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + + return errno != EINVAL && errno != EOPNOTSUPP; +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + int key_size, value_size, max_entries, map_flags; + struct bpf_create_map_attr attr = {}; + int fd = -1, fd_inner; + + key_size = sizeof(__u32); + value_size = sizeof(__u32); + max_entries = 1; + map_flags = 0; + + switch (map_type) { + case BPF_MAP_TYPE_STACK_TRACE: + value_size = sizeof(__u64); + break; + case BPF_MAP_TYPE_LPM_TRIE: + key_size = sizeof(__u64); + value_size = sizeof(__u64); + map_flags = BPF_F_NO_PREALLOC; + break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + key_size = sizeof(struct bpf_cgroup_storage_key); + value_size = sizeof(__u64); + max_entries = 0; + break; + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + key_size = 0; + break; + case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + default: + break; + } + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) + return false; + + fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(__u32), sizeof(__u32), 1, 0); + if (fd_inner < 0) + return false; + fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), + fd_inner, 1, 0); + close(fd_inner); + } else { + /* Note: No other restriction on map type probes for offload */ + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + attr.map_ifindex = ifindex; + + fd = bpf_create_map_xattr(&attr); + } + if (fd >= 0) + close(fd); + + return fd >= 0; +} + +bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, + __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_EMIT_CALL(id), + BPF_EXIT_INSN() + }; + char buf[4096] = {}; + bool res; + + probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), + ifindex); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + + if (ifindex) { + switch (get_vendor_id(ifindex)) { + case 0x19ee: /* Netronome specific */ + res = res && !grep(buf, "not supported by FW") && + !grep(buf, "unsupported function id"); + break; + default: + break; + } + } + + return res; +} diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h new file mode 100644 index 000000000000..81ecda0cb9c9 --- /dev/null +++ b/tools/lib/bpf/libbpf_util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __LIBBPF_LIBBPF_UTIL_H +#define __LIBBPF_LIBBPF_UTIL_H + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0ce67aea8f3b..ce3ec81b71c0 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -21,6 +21,12 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_id_md { + int ifindex; + __u32 flags; + __u32 id; +}; + int libbpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; @@ -196,6 +202,85 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } +static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) +{ + if (mode != XDP_ATTACHED_MULTI) + return IFLA_XDP_PROG_ID; + if (flags & XDP_FLAGS_DRV_MODE) + return IFLA_XDP_DRV_PROG_ID; + if (flags & XDP_FLAGS_HW_MODE) + return IFLA_XDP_HW_PROG_ID; + if (flags & XDP_FLAGS_SKB_MODE) + return IFLA_XDP_SKB_PROG_ID; + + return IFLA_XDP_UNSPEC; +} + +static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; + struct xdp_id_md *xdp_id = cookie; + struct ifinfomsg *ifinfo = msg; + unsigned char mode, xdp_attr; + int ret; + + if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) + return 0; + + if (!tb[IFLA_XDP]) + return 0; + + ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); + if (ret) + return ret; + + if (!xdp_tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); + if (!xdp_attr || !xdp_tb[xdp_attr]) + return 0; + + xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_id_md xdp_id = {}; + int sock, ret; + __u32 nl_pid; + __u32 mask; + + if (flags & ~XDP_FLAGS_MASK) + return -EINVAL; + + /* Check whether the single {HW,DRV,SKB} mode is set */ + flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); + mask = flags - 1; + if (flags && flags & mask) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + xdp_id.ifindex = ifindex; + xdp_id.flags = flags; + + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); + if (!ret) + *prog_id = xdp_id.id; + + close(sock); + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp new file mode 100644 index 000000000000..fc134873bb6d --- /dev/null +++ b/tools/lib/bpf/test_libbpf.cpp @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" + +/* do nothing, just make sure we can link successfully */ + +int main(int argc, char *argv[]) +{ + /* libbpf.h */ + libbpf_set_print(NULL); + + /* bpf.h */ + bpf_prog_get_fd_by_id(0); + + /* btf.h */ + btf__new(NULL, 0); +} diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c new file mode 100644 index 000000000000..f98ac82c9aea --- /dev/null +++ b/tools/lib/bpf/xsk.c @@ -0,0 +1,723 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <asm/barrier.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_xdp.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +struct xsk_umem { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_umem *umem; + struct xsk_socket_config config; + int fd; + int xsks_map; + int ifindex; + int prog_fd; + int qidconf_map_fd; + int xsks_map_fd; + __u32 queue_id; + char ifname[IFNAMSIZ]; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit. + * Unfortunately, it is not part of glibc. + */ +static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags, + int fd, __u64 offset) +{ +#ifdef __NR_mmap2 + unsigned int page_shift = __builtin_ffs(getpagesize()) - 1; + long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd, + (off_t)(offset >> page_shift)); + + return (void *)ret; +#else + return mmap(addr, length, prot, flags, fd, offset); +#endif +} + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; +} + +static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return; + } + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; +} + +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, + struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_mmap_offsets off; + struct xdp_umem_reg mr; + struct xsk_umem *umem; + socklen_t optlen; + void *map; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + xsk_set_umem_config(&umem->config, usr_config); + + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) { + err = -errno; + goto out_socket; + } + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + map = xsk_mmap(NULL, off.fr.desc + + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + umem->fill = fill; + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = xsk_mmap(NULL, + off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + umem->comp = comp; + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->ring = map + off.cr.desc; + + *umem_ptr = umem; + return 0; + +out_mmap: + munmap(umem->fill, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + int err, prog_fd; + + /* This is the C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int *qidconf, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * qidconf = bpf_map_lookup_elem(&qidconf_map, &index); + * if (!qidconf) + * return XDP_ABORTED; + * + * if (*qidconf) + * return bpf_redirect_map(&xsks_map, index, 0); + * + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r1 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r1 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_0, 0), + /* if r1 == 0 goto +8 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + BPF_MOV32_IMM(BPF_REG_0, 2), + /* r1 = *(u32 *)(r1 + 0) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* if r1 == 0 goto +5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + BPF_MOV32_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, + BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { + pr_warning("BPF log buffer:\n%s", bpf_log_buf); + return prog_fd; + } + + err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); + if (err) { + close(prog_fd); + return err; + } + + xsk->prog_fd = prog_fd; + return 0; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int fd, err, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + channels.cmd = ETHTOOL_GCHANNELS; + ifr.ifr_data = (void *)&channels; + strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (channels.max_combined == 0 || errno == EOPNOTSUPP) + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + else + ret = channels.max_combined; + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) + return fd; + xsk->qidconf_map_fd = fd; + + fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) { + close(xsk->qidconf_map_fd); + return fd; + } + xsk->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + close(xsk->qidconf_map_fd); + close(xsk->xsks_map_fd); +} + +static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, + int xsks_value) +{ + bool qidconf_map_updated = false, xsks_map_updated = false; + struct bpf_prog_info prog_info = {}; + __u32 prog_len = sizeof(prog_info); + struct bpf_map_info map_info; + __u32 map_len = sizeof(map_info); + __u32 *map_ids; + int reset_value = 0; + __u32 num_maps; + unsigned int i; + int err; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + int fd; + + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) { + err = -errno; + goto out_maps; + } + + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) + goto out_maps; + + if (!strcmp(map_info.name, "qidconf_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &qidconf_value, 0); + if (err) + goto out_maps; + qidconf_map_updated = true; + xsk->qidconf_map_fd = fd; + } else if (!strcmp(map_info.name, "xsks_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &xsks_value, 0); + if (err) + goto out_maps; + xsks_map_updated = true; + xsk->xsks_map_fd = fd; + } + + if (qidconf_map_updated && xsks_map_updated) + break; + } + + if (!(qidconf_map_updated && xsks_map_updated)) { + err = -ENOENT; + goto out_maps; + } + + err = 0; + goto out_success; + +out_maps: + if (qidconf_map_updated) + (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, + &reset_value, 0); + if (xsks_map_updated) + (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, + &reset_value, 0); +out_success: + if (qidconf_map_updated) + close(xsk->qidconf_map_fd); + if (xsks_map_updated) + close(xsk->xsks_map_fd); +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +{ + bool prog_attached = false; + __u32 prog_id = 0; + int err; + + err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + xsk->config.xdp_flags); + if (err) + return err; + + if (!prog_id) { + prog_attached = true; + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto out_maps; + } else { + xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + } + + err = xsk_update_bpf_maps(xsk, true, xsk->fd); + if (err) + goto out_load; + + return 0; + +out_load: + if (prog_attached) + close(xsk->prog_fd); +out_maps: + if (prog_attached) + xsk_delete_bpf_maps(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + socklen_t optlen; + void *map; + int err; + + if (!umem || !xsk_ptr || !rx || !tx) + return -EFAULT; + + if (umem->refcount) { + pr_warning("Error: shared umems not supported by libbpf.\n"); + return -EBUSY; + } + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + } + + xsk->outstanding_tx = 0; + xsk->queue_id = queue_id; + xsk->umem = umem; + xsk->ifindex = if_nametoindex(ifname); + if (!xsk->ifindex) { + err = -errno; + goto out_socket; + } + strncpy(xsk->ifname, ifname, IFNAMSIZ); + + xsk_set_xdp_socket_config(&xsk->config, usr_config); + + if (rx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + if (tx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + if (rx) { + map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = map + off.rx.producer; + rx->consumer = map + off.rx.consumer; + rx->ring = map + off.rx.desc; + } + xsk->rx = rx; + + if (tx) { + map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = map + off.tx.producer; + tx->consumer = map + off.tx.consumer; + tx->ring = map + off.tx.desc; + tx->cached_cons = xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = xsk->ifindex; + sxdp.sxdp_queue_id = xsk->queue_id; + sxdp.sxdp_flags = xsk->config.bind_flags; + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = xsk_setup_xdp_prog(xsk); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + return 0; + +out_mmap_tx: + if (tx) + munmap(xsk->tx, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(xsk->rx, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + munmap(umem->fill->ring, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + (void)xsk_update_bpf_maps(xsk, 0, 0); + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) + munmap(xsk->rx->ring, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); + if (xsk->tx) + munmap(xsk->tx->ring, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); + } + + xsk->umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != xsk->umem->fd) + close(xsk->fd); + free(xsk); +} diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h new file mode 100644 index 000000000000..a497f00e2962 --- /dev/null +++ b/tools/lib/bpf/xsk.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#ifndef __LIBBPF_XSK_H +#define __LIBBPF_XSK_H + +#include <stdio.h> +#include <stdint.h> +#include <linux/if_xdp.h> + +#include "libbpf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = *r->consumer + r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = *r->producer; + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, + size_t nb, __u32 *idx) +{ + if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) +{ + /* Make sure everything has been written to the ring before signalling + * this to the kernel. + */ + smp_wmb(); + + *prod->producer += nb; +} + +static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, + size_t nb, __u32 *idx) +{ + size_t entries = xsk_cons_nb_avail(cons, nb); + + if (likely(entries > 0)) { + /* Make sure we do not speculatively read the data before + * we have received the packet buffers from the ring. + */ + smp_rmb(); + + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) +{ + *cons->consumer += nb; +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_XSK_H */ diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h index 8862da80995a..a81d91d4fc78 100644 --- a/tools/lib/lockdep/include/liblockdep/common.h +++ b/tools/lib/lockdep/include/liblockdep/common.h @@ -44,6 +44,9 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, struct lockdep_map *nest_lock, unsigned long ip); void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); +void lockdep_reset_lock(struct lockdep_map *lock); +void lockdep_register_key(struct lock_class_key *key); +void lockdep_unregister_key(struct lock_class_key *key); extern void debug_check_no_locks_freed(const void *from, unsigned long len); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h index a80ac39f966e..783dd0df06f9 100644 --- a/tools/lib/lockdep/include/liblockdep/mutex.h +++ b/tools/lib/lockdep/include/liblockdep/mutex.h @@ -7,6 +7,7 @@ struct liblockdep_pthread_mutex { pthread_mutex_t mutex; + struct lock_class_key key; struct lockdep_map dep_map; }; @@ -27,11 +28,10 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, return pthread_mutex_init(&lock->mutex, __mutexattr); } -#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ -({ \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key, (mutexattr)); \ +#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ +({ \ + lockdep_register_key(&(mutex)->key); \ + __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr)); \ }) static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) @@ -54,6 +54,8 @@ static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock) { + lockdep_reset_lock(&lock->dep_map); + lockdep_unregister_key(&lock->key); return pthread_mutex_destroy(&lock->mutex); } diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h index a96c3bf0fef1..365762e3a1ea 100644 --- a/tools/lib/lockdep/include/liblockdep/rwlock.h +++ b/tools/lib/lockdep/include/liblockdep/rwlock.h @@ -60,10 +60,10 @@ static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_ return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0; } -static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock) +static inline int liblockdep_pthread_rwlock_trywrlock(liblockdep_pthread_rwlock_t *lock) { lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_); - return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0; + return pthread_rwlock_trywrlock(&lock->rwlock) == 0 ? 1 : 0; } static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock) @@ -79,7 +79,7 @@ static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock) #define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock #define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock #define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock -#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock +#define pthread_rwlock_trywrlock liblockdep_pthread_rwlock_trywrlock #define pthread_rwlock_destroy liblockdep_rwlock_destroy #endif diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c index 6002fcf2f9bc..348a9d0fb766 100644 --- a/tools/lib/lockdep/lockdep.c +++ b/tools/lib/lockdep/lockdep.c @@ -15,6 +15,11 @@ u32 prandom_u32(void) abort(); } +void print_irqtrace_events(struct task_struct *curr) +{ + abort(); +} + static struct new_utsname *init_utsname(void) { static struct new_utsname n = (struct new_utsname) { diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh index 2e570a188f16..11f425662b43 100755 --- a/tools/lib/lockdep/run_tests.sh +++ b/tools/lib/lockdep/run_tests.sh @@ -1,32 +1,47 @@ #! /bin/bash # SPDX-License-Identifier: GPL-2.0 -make &> /dev/null +if ! make >/dev/null; then + echo "Building liblockdep failed." + echo "FAILED!" + exit 1 +fi -for i in `ls tests/*.c`; do +find tests -name '*.c' | sort | while read -r i; do testname=$(basename "$i" .c) - gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null echo -ne "$testname... " - if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then + if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP && + timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then echo "PASSED!" else echo "FAILED!" fi - if [ -f "tests/$testname" ]; then - rm tests/$testname - fi + rm -f "tests/$testname" done -for i in `ls tests/*.c`; do +find tests -name '*.c' | sort | while read -r i; do testname=$(basename "$i" .c) - gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null echo -ne "(PRELOAD) $testname... " - if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then + if gcc -o "tests/$testname" -pthread -Iinclude "$i" && + timeout 1 ./lockdep "tests/$testname" 2>&1 | + /bin/bash "tests/${testname}.sh"; then echo "PASSED!" else echo "FAILED!" fi - if [ -f "tests/$testname" ]; then - rm tests/$testname + rm -f "tests/$testname" +done + +find tests -name '*.c' | sort | while read -r i; do + testname=$(basename "$i" .c) + echo -ne "(PRELOAD + Valgrind) $testname... " + if gcc -o "tests/$testname" -pthread -Iinclude "$i" && + { timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } && + /bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" && + ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then + echo "PASSED!" + else + echo "FAILED!" fi + rm -f "tests/$testname" done diff --git a/tools/lib/lockdep/tests/AA.sh b/tools/lib/lockdep/tests/AA.sh new file mode 100644 index 000000000000..f39b32865074 --- /dev/null +++ b/tools/lib/lockdep/tests/AA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/ABA.sh b/tools/lib/lockdep/tests/ABA.sh new file mode 100644 index 000000000000..f39b32865074 --- /dev/null +++ b/tools/lib/lockdep/tests/ABA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c index 1460afd33d71..543789bc3e37 100644 --- a/tools/lib/lockdep/tests/ABBA.c +++ b/tools/lib/lockdep/tests/ABBA.c @@ -11,4 +11,16 @@ void main(void) LOCK_UNLOCK_2(a, b); LOCK_UNLOCK_2(b, a); + + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, a); + + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABBA.sh b/tools/lib/lockdep/tests/ABBA.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBA_2threads.sh b/tools/lib/lockdep/tests/ABBA_2threads.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBA_2threads.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c index a54c1b2af118..48446129d496 100644 --- a/tools/lib/lockdep/tests/ABBCCA.c +++ b/tools/lib/lockdep/tests/ABBCCA.c @@ -13,4 +13,8 @@ void main(void) LOCK_UNLOCK_2(a, b); LOCK_UNLOCK_2(b, c); LOCK_UNLOCK_2(c, a); + + pthread_mutex_destroy(&c); + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABBCCA.sh b/tools/lib/lockdep/tests/ABBCCA.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c index aa5d194e8869..3570bf7b3804 100644 --- a/tools/lib/lockdep/tests/ABBCCDDA.c +++ b/tools/lib/lockdep/tests/ABBCCDDA.c @@ -15,4 +15,9 @@ void main(void) LOCK_UNLOCK_2(b, c); LOCK_UNLOCK_2(c, d); LOCK_UNLOCK_2(d, a); + + pthread_mutex_destroy(&d); + pthread_mutex_destroy(&c); + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABBCCDDA.sh b/tools/lib/lockdep/tests/ABBCCDDA.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABBCCDDA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c index b54a08e60416..a1c4659894cd 100644 --- a/tools/lib/lockdep/tests/ABCABC.c +++ b/tools/lib/lockdep/tests/ABCABC.c @@ -13,4 +13,8 @@ void main(void) LOCK_UNLOCK_2(a, b); LOCK_UNLOCK_2(c, a); LOCK_UNLOCK_2(b, c); + + pthread_mutex_destroy(&c); + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABCABC.sh b/tools/lib/lockdep/tests/ABCABC.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCABC.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c index a56742250d86..335af1c90ab5 100644 --- a/tools/lib/lockdep/tests/ABCDBCDA.c +++ b/tools/lib/lockdep/tests/ABCDBCDA.c @@ -15,4 +15,9 @@ void main(void) LOCK_UNLOCK_2(c, d); LOCK_UNLOCK_2(b, c); LOCK_UNLOCK_2(d, a); + + pthread_mutex_destroy(&d); + pthread_mutex_destroy(&c); + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABCDBCDA.sh b/tools/lib/lockdep/tests/ABCDBCDA.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBCDA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c index 238a3353f3c3..3c5972863049 100644 --- a/tools/lib/lockdep/tests/ABCDBDDA.c +++ b/tools/lib/lockdep/tests/ABCDBDDA.c @@ -15,4 +15,9 @@ void main(void) LOCK_UNLOCK_2(c, d); LOCK_UNLOCK_2(b, d); LOCK_UNLOCK_2(d, a); + + pthread_mutex_destroy(&d); + pthread_mutex_destroy(&c); + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/ABCDBDDA.sh b/tools/lib/lockdep/tests/ABCDBDDA.sh new file mode 100644 index 000000000000..fc31c607a5a8 --- /dev/null +++ b/tools/lib/lockdep/tests/ABCDBDDA.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible circular locking dependency detected' diff --git a/tools/lib/lockdep/tests/WW.sh b/tools/lib/lockdep/tests/WW.sh new file mode 100644 index 000000000000..f39b32865074 --- /dev/null +++ b/tools/lib/lockdep/tests/WW.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: possible recursive locking detected' diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c index 34cf32f689de..dba25064b50a 100644 --- a/tools/lib/lockdep/tests/unlock_balance.c +++ b/tools/lib/lockdep/tests/unlock_balance.c @@ -10,4 +10,6 @@ void main(void) pthread_mutex_lock(&a); pthread_mutex_unlock(&a); pthread_mutex_unlock(&a); + + pthread_mutex_destroy(&a); } diff --git a/tools/lib/lockdep/tests/unlock_balance.sh b/tools/lib/lockdep/tests/unlock_balance.sh new file mode 100644 index 000000000000..c6e3952303fe --- /dev/null +++ b/tools/lib/lockdep/tests/unlock_balance.sh @@ -0,0 +1,2 @@ +#!/bin/bash +grep -q 'WARNING: bad unlock balance detected' diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 17c2b596f043..904adb70a4f0 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -22,6 +22,7 @@ */ #include <linux/rbtree_augmented.h> +#include <linux/export.h> /* * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree @@ -43,6 +44,30 @@ * parentheses and have some accompanying text comment. */ +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + static inline void rb_set_black(struct rb_node *rb) { rb->__rb_parent_color |= RB_BLACK; @@ -70,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, static __always_inline void __rb_insert(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + if (newleft) + *leftmost = node; + while (true) { /* - * Loop invariant: node is red - * - * If there is a black parent, we are done. - * Otherwise, take some corrective action as we don't - * want a red root or two consecutive red nodes. + * Loop invariant: node is red. */ - if (!parent) { + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ rb_set_parent_color(node, NULL, RB_BLACK); break; - } else if (rb_is_black(parent)) + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) break; gparent = rb_red_parent(parent); @@ -94,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, if (parent != tmp) { /* parent == gparent->rb_left */ if (tmp && rb_is_red(tmp)) { /* - * Case 1 - color flips + * Case 1 - node's uncle is red (color flips). * * G g * / \ / \ @@ -117,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_right; if (node == tmp) { /* - * Case 2 - left rotate at parent + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). * * G G * / \ / \ @@ -128,8 +167,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * This still leaves us in violation of 4), the * continuation into Case 3 will fix that. */ - parent->rb_right = tmp = node->rb_left; - node->rb_left = parent; + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -140,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* - * Case 3 - right rotate at gparent + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). * * G P * / \ / \ @@ -148,8 +189,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ * n U */ - gparent->rb_left = tmp; /* == parent->rb_right */ - parent->rb_right = gparent; + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -170,8 +211,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_left; if (node == tmp) { /* Case 2 - right rotate at parent */ - parent->rb_left = tmp = node->rb_right; - node->rb_right = parent; + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -182,8 +224,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* Case 3 - left rotate at gparent */ - gparent->rb_right = tmp; /* == parent->rb_left */ - parent->rb_left = gparent; + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -223,8 +265,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * Sl Sr N Sl */ - parent->rb_right = tmp1 = sibling->rb_left; - sibling->rb_left = parent; + tmp1 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp1); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -268,15 +311,31 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * * (p) (p) * / \ / \ - * N S --> N Sl + * N S --> N sl * / \ \ - * sl Sr s + * sl Sr S * \ * Sr + * + * Note: p might be red, and then both + * p and sl are red after rotation(which + * breaks property 4). This is fixed in + * Case 4 (in __rb_rotate_set_parents() + * which set sl the color of p + * and set p RB_BLACK) + * + * (p) (sl) + * / \ / \ + * N sl --> P S + * \ / \ + * S N Sr + * \ + * Sr */ - sibling->rb_left = tmp1 = tmp2->rb_right; - tmp2->rb_right = sibling; - parent->rb_right = tmp2; + tmp1 = tmp2->rb_right; + WRITE_ONCE(sibling->rb_left, tmp1); + WRITE_ONCE(tmp2->rb_right, sibling); + WRITE_ONCE(parent->rb_right, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -296,8 +355,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * (sl) sr N (sl) */ - parent->rb_right = tmp2 = sibling->rb_left; - sibling->rb_left = parent; + tmp2 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp2); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -309,8 +369,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = parent->rb_left; if (rb_is_red(sibling)) { /* Case 1 - right rotate at parent */ - parent->rb_left = tmp1 = sibling->rb_right; - sibling->rb_right = parent; + tmp1 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp1); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -334,10 +395,11 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, } break; } - /* Case 3 - right rotate at sibling */ - sibling->rb_right = tmp1 = tmp2->rb_left; - tmp2->rb_left = sibling; - parent->rb_left = tmp2; + /* Case 3 - left rotate at sibling */ + tmp1 = tmp2->rb_left; + WRITE_ONCE(sibling->rb_right, tmp1); + WRITE_ONCE(tmp2->rb_left, sibling); + WRITE_ONCE(parent->rb_left, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -345,9 +407,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, tmp1 = sibling; sibling = tmp2; } - /* Case 4 - left rotate at parent + color flips */ - parent->rb_left = tmp2 = sibling->rb_right; - sibling->rb_right = parent; + /* Case 4 - right rotate at parent + color flips */ + tmp2 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp2); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -378,22 +441,41 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} static const struct rb_augment_callbacks dummy_callbacks = { - dummy_propagate, dummy_copy, dummy_rotate + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate }; void rb_insert_color(struct rb_node *node, struct rb_root *root) { - __rb_insert(node, root, dummy_rotate); + __rb_insert(node, root, false, NULL, dummy_rotate); } void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *rebalance; - rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + rebalance = __rb_erase_augmented(node, root, + NULL, &dummy_callbacks); if (rebalance) ____rb_erase_color(rebalance, root, dummy_rotate); } +void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, bool leftmost) +{ + __rb_insert(node, &root->rb_root, leftmost, + &root->rb_leftmost, dummy_rotate); +} + +void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate); +} + /* * Augmented rbtree manipulation functions. * @@ -402,9 +484,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) */ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { - __rb_insert(node, root, augment_rotate); + __rb_insert(node, root, newleft, leftmost, augment_rotate); } /* @@ -498,15 +581,24 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, { struct rb_node *parent = rb_parent(victim); + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; + /* Set the surrounding nodes to point to the replacement */ - __rb_change_child(victim, new, parent, root); if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); + __rb_change_child(victim, new, parent, root); +} - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; +void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root) +{ + rb_replace_node(victim, new, &root->rb_root); + + if (root->rb_leftmost == victim) + root->rb_leftmost = new; } static struct rb_node *rb_left_deepest_node(const struct rb_node *node) diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 95563b8e1ad7..ed61fb3a46c0 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -36,8 +36,6 @@ endif CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE CFLAGS += -I$(srctree)/tools/include/ -CFLAGS += -I$(srctree)/include/uapi -CFLAGS += -I$(srctree)/include SUBCMD_IN := $(OUTPUT)libsubcmd-in.o diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index cb7154eccbdc..dbb9efbf718a 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -116,6 +116,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -166,6 +167,7 @@ static int get_value(struct parse_opt_ctx_t *p, case OPTION_INTEGER: case OPTION_UINTEGER: case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: default: break; @@ -295,6 +297,22 @@ static int get_value(struct parse_opt_ctx_t *p, return opterror(opt, "expects a numerical value", flags); return 0; + case OPTION_ULONG: + if (unset) { + *(unsigned long *)opt->value = 0; + return 0; + } + if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { + *(unsigned long *)opt->value = opt->defval; + return 0; + } + if (get_arg(p, opt, flags, &arg)) + return -1; + *(unsigned long *)opt->value = strtoul(arg, (char **)&s, 10); + if (*s) + return opterror(opt, "expects a numerical value", flags); + return 0; + case OPTION_U64: if (unset) { *(u64 *)opt->value = 0; @@ -703,6 +721,7 @@ static void print_option_help(const struct option *opts, int full) case OPTION_ARGUMENT: break; case OPTION_LONG: + case OPTION_ULONG: case OPTION_U64: case OPTION_INTEGER: case OPTION_UINTEGER: diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h index 92fdbe1519f6..af9def589863 100644 --- a/tools/lib/subcmd/parse-options.h +++ b/tools/lib/subcmd/parse-options.h @@ -25,6 +25,7 @@ enum parse_opt_type { OPTION_STRING, OPTION_INTEGER, OPTION_LONG, + OPTION_ULONG, OPTION_CALLBACK, OPTION_U64, OPTION_UINTEGER, @@ -70,7 +71,7 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset); * * `argh`:: * token to explain the kind of argument this option wants. Keep it - * homogenous across the repository. + * homogeneous across the repository. * * `help`:: * the short help associated to what the option does. @@ -79,7 +80,7 @@ typedef int parse_opt_cb(const struct option *, const char *arg, int unset); * * `flags`:: * mask of parse_opt_option_flags. - * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) + * PARSE_OPT_OPTARG: says that the argument is optional (not for BOOLEANs) * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs * PARSE_OPT_NONEG: says that this option cannot be negated * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in @@ -133,6 +134,7 @@ struct option { #define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) } #define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) } #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } +#define OPT_ULONG(s, l, v, h) { .type = OPTION_ULONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) } #define OPT_STRING_OPTARG(s, l, v, a, h, d) \ diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 0b4e833088a4..941761d9923d 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -25,6 +25,7 @@ endef $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,NM,$(CROSS_COMPILE)nm) +$(call allow-override,PKG_CONFIG,pkg-config) EXT = -std=gnu99 INSTALL = install @@ -47,6 +48,8 @@ prefix ?= /usr/local libdir = $(prefix)/$(libdir_relative) man_dir = $(prefix)/share/man man_dir_SQ = '$(subst ','\'',$(man_dir))' +pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ + --variable pc_path pkg-config | tr ":" " ")) export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ @@ -174,7 +177,7 @@ $(TE_IN): force $(Q)$(MAKE) $(build)=libtraceevent $(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN) - $(QUIET_LINK)$(CC) --shared $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ + $(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@ @ln -sf $(@F) $(OUTPUT)libtraceevent.so @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION) @@ -193,7 +196,7 @@ $(PLUGINS_IN): force $(Q)$(MAKE) $(build)=$(plugin_obj) $(OUTPUT)%.so: $(OUTPUT)%-in.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ + $(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -270,7 +273,19 @@ define do_generate_dynamic_list_file fi endef -install_lib: all_cmd install_plugins +PKG_CONFIG_FILE = libtraceevent.pc +define do_install_pkgconfig_file + if [ -n "${pkgconfig_dir}" ]; then \ + cp -f ${PKG_CONFIG_FILE}.template ${PKG_CONFIG_FILE}; \ + sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ + sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \ + $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ + else \ + (echo Failed to locate pkg-config directory) 1>&2; \ + fi +endef + +install_lib: all_cmd install_plugins install_headers install_pkgconfig $(call QUIET_INSTALL, $(LIB_TARGET)) \ $(call do_install_mkdir,$(libdir_SQ)); \ cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ) @@ -279,18 +294,24 @@ install_plugins: $(PLUGINS) $(call QUIET_INSTALL, trace_plugins) \ $(call do_install_plugins, $(PLUGINS)) +install_pkgconfig: + $(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \ + $(call do_install_pkgconfig_file,$(prefix)) + install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,event-parse.h,$(prefix)/include/traceevent,644); \ $(call do_install,event-utils.h,$(prefix)/include/traceevent,644); \ + $(call do_install,trace-seq.h,$(prefix)/include/traceevent,644); \ $(call do_install,kbuffer.h,$(prefix)/include/traceevent,644) install: install_lib clean: $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ - $(RM) TRACEEVENT-CFLAGS tags TAGS + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \ + $(RM) TRACEEVENT-CFLAGS tags TAGS; \ + $(RM) $(PKG_CONFIG_FILE) PHONY += force plugins force: diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c index 61f7149085ee..d463761a58f4 100644 --- a/tools/lib/traceevent/event-parse-api.c +++ b/tools/lib/traceevent/event-parse-api.c @@ -15,7 +15,7 @@ * This returns pointer to the first element of the events array * If @tep is NULL, NULL is returned. */ -struct tep_event_format *tep_get_first_event(struct tep_handle *tep) +struct tep_event *tep_get_first_event(struct tep_handle *tep) { if (tep && tep->events) return tep->events[0]; @@ -51,7 +51,7 @@ void tep_set_flag(struct tep_handle *tep, int flag) tep->flags |= flag; } -unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data) +unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data) { unsigned short swap; @@ -64,7 +64,7 @@ unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data) return swap; } -unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data) +unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data) { unsigned int swap; @@ -80,7 +80,7 @@ unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data) } unsigned long long -__tep_data2host8(struct tep_handle *pevent, unsigned long long data) +tep_data2host8(struct tep_handle *pevent, unsigned long long data) { unsigned long long swap; @@ -194,13 +194,13 @@ void tep_set_page_size(struct tep_handle *pevent, int _page_size) } /** - * tep_is_file_bigendian - get if the file is in big endian order + * tep_file_bigendian - get if the file is in big endian order * @pevent: a handle to the tep_handle * * This returns if the file is in big endian order * If @pevent is NULL, 0 is returned. */ -int tep_is_file_bigendian(struct tep_handle *pevent) +int tep_file_bigendian(struct tep_handle *pevent) { if(pevent) return pevent->file_bigendian; diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index b9bddde577f8..35833ee32d6c 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -7,7 +7,7 @@ #ifndef _PARSE_EVENTS_INT_H #define _PARSE_EVENTS_INT_H -struct cmdline; +struct tep_cmdline; struct cmdline_list; struct func_map; struct func_list; @@ -36,7 +36,7 @@ struct tep_handle { int long_size; int page_size; - struct cmdline *cmdlines; + struct tep_cmdline *cmdlines; struct cmdline_list *cmdlist; int cmdline_count; @@ -50,9 +50,9 @@ struct tep_handle { unsigned int printk_count; - struct tep_event_format **events; + struct tep_event **events; int nr_events; - struct tep_event_format **sort_events; + struct tep_event **sort_events; enum tep_event_sort_type last_type; int type_offset; @@ -84,9 +84,16 @@ struct tep_handle { struct tep_function_handler *func_handlers; /* cache */ - struct tep_event_format *last_event; + struct tep_event *last_event; char *trace_clock; }; +void tep_free_event(struct tep_event *event); +void tep_free_format_field(struct tep_format_field *field); + +unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data); +unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data); +unsigned long long tep_data2host8(struct tep_handle *pevent, unsigned long long data); + #endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 3692f29fee46..abd4fa5d3088 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -96,7 +96,7 @@ struct tep_function_handler { static unsigned long long process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event_format *event, struct tep_print_arg *arg); + struct tep_event *event, struct tep_print_arg *arg); static void free_func_handle(struct tep_function_handler *func); @@ -124,15 +124,15 @@ struct tep_print_arg *alloc_arg(void) return calloc(1, sizeof(struct tep_print_arg)); } -struct cmdline { +struct tep_cmdline { char *comm; int pid; }; static int cmdline_cmp(const void *a, const void *b) { - const struct cmdline *ca = a; - const struct cmdline *cb = b; + const struct tep_cmdline *ca = a; + const struct tep_cmdline *cb = b; if (ca->pid < cb->pid) return -1; @@ -152,7 +152,7 @@ static int cmdline_init(struct tep_handle *pevent) { struct cmdline_list *cmdlist = pevent->cmdlist; struct cmdline_list *item; - struct cmdline *cmdlines; + struct tep_cmdline *cmdlines; int i; cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count); @@ -179,8 +179,8 @@ static int cmdline_init(struct tep_handle *pevent) static const char *find_cmdline(struct tep_handle *pevent, int pid) { - const struct cmdline *comm; - struct cmdline key; + const struct tep_cmdline *comm; + struct tep_cmdline key; if (!pid) return "<idle>"; @@ -208,8 +208,8 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid) */ int tep_pid_is_registered(struct tep_handle *pevent, int pid) { - const struct cmdline *comm; - struct cmdline key; + const struct tep_cmdline *comm; + struct tep_cmdline key; if (!pid) return 1; @@ -232,11 +232,13 @@ int tep_pid_is_registered(struct tep_handle *pevent, int pid) * we must add this pid. This is much slower than when cmdlines * are added before the array is initialized. */ -static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) +static int add_new_comm(struct tep_handle *pevent, + const char *comm, int pid, bool override) { - struct cmdline *cmdlines = pevent->cmdlines; - const struct cmdline *cmdline; - struct cmdline key; + struct tep_cmdline *cmdlines = pevent->cmdlines; + struct tep_cmdline *cmdline; + struct tep_cmdline key; + char *new_comm; if (!pid) return 0; @@ -247,8 +249,19 @@ static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, sizeof(*pevent->cmdlines), cmdline_cmp); if (cmdline) { - errno = EEXIST; - return -1; + if (!override) { + errno = EEXIST; + return -1; + } + new_comm = strdup(comm); + if (!new_comm) { + errno = ENOMEM; + return -1; + } + free(cmdline->comm); + cmdline->comm = new_comm; + + return 0; } cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1)); @@ -275,21 +288,13 @@ static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) return 0; } -/** - * tep_register_comm - register a pid / comm mapping - * @pevent: handle for the pevent - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. - */ -int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) +static int _tep_register_comm(struct tep_handle *pevent, + const char *comm, int pid, bool override) { struct cmdline_list *item; if (pevent->cmdlines) - return add_new_comm(pevent, comm, pid); + return add_new_comm(pevent, comm, pid, override); item = malloc(sizeof(*item)); if (!item) @@ -312,6 +317,40 @@ int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) return 0; } +/** + * tep_register_comm - register a pid / comm mapping + * @pevent: handle for the pevent + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. If a command with the same pid + * already exist, -1 is returned and errno is set to EEXIST + */ +int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) +{ + return _tep_register_comm(pevent, comm, pid, false); +} + +/** + * tep_override_comm - register a pid / comm mapping + * @pevent: handle for the pevent + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. If a command with the same pid + * already exist, the command string is udapted with the new one + */ +int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid) +{ + if (!pevent->cmdlines && cmdline_init(pevent)) { + errno = ENOMEM; + return -1; + } + return _tep_register_comm(pevent, comm, pid, true); +} + int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock) { pevent->trace_clock = strdup(trace_clock); @@ -739,16 +778,16 @@ void tep_print_printk(struct tep_handle *pevent) } } -static struct tep_event_format *alloc_event(void) +static struct tep_event *alloc_event(void) { - return calloc(1, sizeof(struct tep_event_format)); + return calloc(1, sizeof(struct tep_event)); } -static int add_event(struct tep_handle *pevent, struct tep_event_format *event) +static int add_event(struct tep_handle *pevent, struct tep_event *event) { int i; - struct tep_event_format **events = realloc(pevent->events, sizeof(event) * - (pevent->nr_events + 1)); + struct tep_event **events = realloc(pevent->events, sizeof(event) * + (pevent->nr_events + 1)); if (!events) return -1; @@ -1145,7 +1184,7 @@ static enum tep_event_type read_token(char **tok) } /** - * tep_read_token - access to utilites to use the pevent parser + * tep_read_token - access to utilities to use the pevent parser * @tok: The token to return * * This will parse tokens from the string given by @@ -1355,7 +1394,7 @@ static unsigned int type_size(const char *name) return 0; } -static int event_read_fields(struct tep_event_format *event, struct tep_format_field **fields) +static int event_read_fields(struct tep_event *event, struct tep_format_field **fields) { struct tep_format_field *field = NULL; enum tep_event_type type; @@ -1642,7 +1681,7 @@ fail_expect: return -1; } -static int event_read_format(struct tep_event_format *event) +static int event_read_format(struct tep_event *event) { char *token; int ret; @@ -1675,11 +1714,11 @@ static int event_read_format(struct tep_event_format *event) } static enum tep_event_type -process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg, +process_arg_token(struct tep_event *event, struct tep_print_arg *arg, char **tok, enum tep_event_type type); static enum tep_event_type -process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) { enum tep_event_type type; char *token; @@ -1691,14 +1730,14 @@ process_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **to } static enum tep_event_type -process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok); +process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok); /* * For __print_symbolic() and __print_flags, we need to completely * evaluate the first argument, which defines what to print next. */ static enum tep_event_type -process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok) { enum tep_event_type type; @@ -1712,7 +1751,7 @@ process_field_arg(struct tep_event_format *event, struct tep_print_arg *arg, cha } static enum tep_event_type -process_cond(struct tep_event_format *event, struct tep_print_arg *top, char **tok) +process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok) { struct tep_print_arg *arg, *left, *right; enum tep_event_type type; @@ -1768,7 +1807,7 @@ out_free: } static enum tep_event_type -process_array(struct tep_event_format *event, struct tep_print_arg *top, char **tok) +process_array(struct tep_event *event, struct tep_print_arg *top, char **tok) { struct tep_print_arg *arg; enum tep_event_type type; @@ -1870,7 +1909,7 @@ static int set_op_prio(struct tep_print_arg *arg) /* Note, *tok does not get freed, but will most likely be saved */ static enum tep_event_type -process_op(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_print_arg *left, *right = NULL; enum tep_event_type type; @@ -2071,7 +2110,7 @@ out_free: } static enum tep_event_type -process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg, +process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, char **tok) { enum tep_event_type type; @@ -2110,7 +2149,7 @@ process_entry(struct tep_event_format *event __maybe_unused, struct tep_print_ar return TEP_EVENT_ERROR; } -static int alloc_and_process_delim(struct tep_event_format *event, char *next_token, +static int alloc_and_process_delim(struct tep_event *event, char *next_token, struct tep_print_arg **print_arg) { struct tep_print_arg *field; @@ -2445,7 +2484,7 @@ static char *arg_eval (struct tep_print_arg *arg) } static enum tep_event_type -process_fields(struct tep_event_format *event, struct tep_print_flag_sym **list, char **tok) +process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok) { enum tep_event_type type; struct tep_print_arg *arg = NULL; @@ -2526,7 +2565,7 @@ out_free: } static enum tep_event_type -process_flags(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_print_arg *field; enum tep_event_type type; @@ -2579,7 +2618,7 @@ out_free: } static enum tep_event_type -process_symbols(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_print_arg *field; enum tep_event_type type; @@ -2618,7 +2657,7 @@ out_free: } static enum tep_event_type -process_hex_common(struct tep_event_format *event, struct tep_print_arg *arg, +process_hex_common(struct tep_event *event, struct tep_print_arg *arg, char **tok, enum tep_print_arg_type type) { memset(arg, 0, sizeof(*arg)); @@ -2641,20 +2680,20 @@ out: } static enum tep_event_type -process_hex(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok) { return process_hex_common(event, arg, tok, TEP_PRINT_HEX); } static enum tep_event_type -process_hex_str(struct tep_event_format *event, struct tep_print_arg *arg, +process_hex_str(struct tep_event *event, struct tep_print_arg *arg, char **tok) { return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR); } static enum tep_event_type -process_int_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) { memset(arg, 0, sizeof(*arg)); arg->type = TEP_PRINT_INT_ARRAY; @@ -2682,7 +2721,7 @@ out: } static enum tep_event_type -process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_format_field *field; enum tep_event_type type; @@ -2746,7 +2785,7 @@ process_dynamic_array(struct tep_event_format *event, struct tep_print_arg *arg, } static enum tep_event_type -process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg *arg, +process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_format_field *field; @@ -2782,7 +2821,7 @@ process_dynamic_array_len(struct tep_event_format *event, struct tep_print_arg * } static enum tep_event_type -process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char **tok) +process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok) { struct tep_print_arg *item_arg; enum tep_event_type type; @@ -2845,7 +2884,7 @@ process_paren(struct tep_event_format *event, struct tep_print_arg *arg, char ** static enum tep_event_type -process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg, +process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, char **tok) { enum tep_event_type type; @@ -2874,7 +2913,7 @@ process_str(struct tep_event_format *event __maybe_unused, struct tep_print_arg } static enum tep_event_type -process_bitmask(struct tep_event_format *event __maybe_unused, struct tep_print_arg *arg, +process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg, char **tok) { enum tep_event_type type; @@ -2935,7 +2974,7 @@ static void remove_func_handler(struct tep_handle *pevent, char *func_name) } static enum tep_event_type -process_func_handler(struct tep_event_format *event, struct tep_function_handler *func, +process_func_handler(struct tep_event *event, struct tep_function_handler *func, struct tep_print_arg *arg, char **tok) { struct tep_print_arg **next_arg; @@ -2993,7 +3032,7 @@ err: } static enum tep_event_type -process_function(struct tep_event_format *event, struct tep_print_arg *arg, +process_function(struct tep_event *event, struct tep_print_arg *arg, char *token, char **tok) { struct tep_function_handler *func; @@ -3049,7 +3088,7 @@ process_function(struct tep_event_format *event, struct tep_print_arg *arg, } static enum tep_event_type -process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg, +process_arg_token(struct tep_event *event, struct tep_print_arg *arg, char **tok, enum tep_event_type type) { char *token; @@ -3137,7 +3176,7 @@ process_arg_token(struct tep_event_format *event, struct tep_print_arg *arg, return type; } -static int event_read_print_args(struct tep_event_format *event, struct tep_print_arg **list) +static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list) { enum tep_event_type type = TEP_EVENT_ERROR; struct tep_print_arg *arg; @@ -3195,7 +3234,7 @@ static int event_read_print_args(struct tep_event_format *event, struct tep_prin return args; } -static int event_read_print(struct tep_event_format *event) +static int event_read_print(struct tep_event *event) { enum tep_event_type type; char *token; @@ -3258,10 +3297,10 @@ static int event_read_print(struct tep_event_format *event) * @name: the name of the common field to return * * Returns a common field from the event by the given @name. - * This only searchs the common fields and not all field. + * This only searches the common fields and not all field. */ struct tep_format_field * -tep_find_common_field(struct tep_event_format *event, const char *name) +tep_find_common_field(struct tep_event *event, const char *name) { struct tep_format_field *format; @@ -3283,7 +3322,7 @@ tep_find_common_field(struct tep_event_format *event, const char *name) * This does not search common fields. */ struct tep_format_field * -tep_find_field(struct tep_event_format *event, const char *name) +tep_find_field(struct tep_event *event, const char *name) { struct tep_format_field *format; @@ -3302,11 +3341,11 @@ tep_find_field(struct tep_event_format *event, const char *name) * @name: the name of the field * * Returns a field by the given @name. - * This searchs the common field names first, then + * This searches the common field names first, then * the non-common ones if a common one was not found. */ struct tep_format_field * -tep_find_any_field(struct tep_event_format *event, const char *name) +tep_find_any_field(struct tep_event *event, const char *name) { struct tep_format_field *format; @@ -3328,15 +3367,18 @@ tep_find_any_field(struct tep_event_format *event, const char *name) unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size) { + unsigned long long val; + switch (size) { case 1: return *(unsigned char *)ptr; case 2: - return tep_data2host2(pevent, ptr); + return tep_data2host2(pevent, *(unsigned short *)ptr); case 4: - return tep_data2host4(pevent, ptr); + return tep_data2host4(pevent, *(unsigned int *)ptr); case 8: - return tep_data2host8(pevent, ptr); + memcpy(&val, (ptr), sizeof(unsigned long long)); + return tep_data2host8(pevent, val); default: /* BUG! */ return 0; @@ -3375,7 +3417,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data, static int get_common_info(struct tep_handle *pevent, const char *type, int *offset, int *size) { - struct tep_event_format *event; + struct tep_event *event; struct tep_format_field *field; /* @@ -3462,11 +3504,11 @@ static int events_id_cmp(const void *a, const void *b); * * Returns an event that has a given @id. */ -struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id) +struct tep_event *tep_find_event(struct tep_handle *pevent, int id) { - struct tep_event_format **eventptr; - struct tep_event_format key; - struct tep_event_format *pkey = &key; + struct tep_event **eventptr; + struct tep_event key; + struct tep_event *pkey = &key; /* Check cache first */ if (pevent->last_event && pevent->last_event->id == id) @@ -3494,11 +3536,11 @@ struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id) * This returns an event with a given @name and under the system * @sys. If @sys is NULL the first event with @name is returned. */ -struct tep_event_format * +struct tep_event * tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name) { - struct tep_event_format *event; + struct tep_event *event = NULL; int i; if (pevent->last_event && @@ -3523,7 +3565,7 @@ tep_find_event_by_name(struct tep_handle *pevent, } static unsigned long long -eval_num_arg(void *data, int size, struct tep_event_format *event, struct tep_print_arg *arg) +eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { struct tep_handle *pevent = event->pevent; unsigned long long val = 0; @@ -3838,7 +3880,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent, /* * data points to a bit mask of size bytes. * In the kernel, this is an array of long words, thus - * endianess is very important. + * endianness is very important. */ if (pevent->file_bigendian) index = size - (len + 1); @@ -3863,7 +3905,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent, } static void print_str_arg(struct trace_seq *s, void *data, int size, - struct tep_event_format *event, const char *format, + struct tep_event *event, const char *format, int len_arg, struct tep_print_arg *arg) { struct tep_handle *pevent = event->pevent; @@ -4062,7 +4104,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->string.string); arg->string.offset = f->offset; } - str_offset = tep_data2host4(pevent, data + arg->string.offset); + str_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->string.offset)); str_offset &= 0xffff; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; @@ -4080,7 +4122,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->bitmask.bitmask); arg->bitmask.offset = f->offset; } - bitmask_offset = tep_data2host4(pevent, data + arg->bitmask.offset); + bitmask_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->bitmask.offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; print_bitmask_to_seq(pevent, s, format, len_arg, @@ -4118,7 +4160,7 @@ out_warning_field: static unsigned long long process_defined_func(struct trace_seq *s, void *data, int size, - struct tep_event_format *event, struct tep_print_arg *arg) + struct tep_event *event, struct tep_print_arg *arg) { struct tep_function_handler *func_handle = arg->func.func; struct func_params *param; @@ -4213,7 +4255,7 @@ static void free_args(struct tep_print_arg *args) } } -static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event_format *event) +static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event) { struct tep_handle *pevent = event->pevent; struct tep_format_field *field, *ip_field; @@ -4221,7 +4263,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s unsigned long long ip, val; char *ptr; void *bptr; - int vsize; + int vsize = 0; field = pevent->bprint_buf_field; ip_field = pevent->bprint_ip_field; @@ -4390,7 +4432,7 @@ out_free: static char * get_bprint_format(void *data, int size __maybe_unused, - struct tep_event_format *event) + struct tep_event *event) { struct tep_handle *pevent = event->pevent; unsigned long long addr; @@ -4425,7 +4467,7 @@ get_bprint_format(void *data, int size __maybe_unused, } static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size, - struct tep_event_format *event, struct tep_print_arg *arg) + struct tep_event *event, struct tep_print_arg *arg) { unsigned char *buf; const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x"; @@ -4578,7 +4620,7 @@ static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf) * %pISpc print an IP address based on sockaddr; p adds port. */ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event_format *event, + void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { unsigned char *buf; @@ -4615,7 +4657,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i, } static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event_format *event, + void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { char have_c = 0; @@ -4665,7 +4707,7 @@ static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i, } static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, - void *data, int size, struct tep_event_format *event, + void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { char have_c = 0, have_p = 0; @@ -4747,7 +4789,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i, } static int print_ip_arg(struct trace_seq *s, const char *ptr, - void *data, int size, struct tep_event_format *event, + void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { char i = *ptr; /* 'i' or 'I' */ @@ -4854,7 +4896,7 @@ void tep_print_field(struct trace_seq *s, void *data, } void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event_format *event) + int size __maybe_unused, struct tep_event *event) { struct tep_format_field *field; @@ -4866,7 +4908,7 @@ void tep_print_fields(struct trace_seq *s, void *data, } } -static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event_format *event) +static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) { struct tep_handle *pevent = event->pevent; struct tep_print_fmt *print_fmt = &event->print_fmt; @@ -4881,7 +4923,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e char format[32]; int show_func; int len_as_arg; - int len_arg; + int len_arg = 0; int len; int ls; @@ -4970,6 +5012,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e if (arg->type == TEP_PRINT_BSTRING) { trace_seq_puts(s, arg->string.string); + arg = arg->next; break; } @@ -5146,8 +5189,8 @@ void tep_data_lat_fmt(struct tep_handle *pevent, static int migrate_disable_exists; unsigned int lat_flags; unsigned int pc; - int lock_depth; - int migrate_disable; + int lock_depth = 0; + int migrate_disable = 0; int hardirq; int softirq; void *data = record->data; @@ -5223,18 +5266,6 @@ int tep_data_type(struct tep_handle *pevent, struct tep_record *rec) } /** - * tep_data_event_from_type - find the event by a given type - * @pevent: a handle to the pevent - * @type: the type of the event. - * - * This returns the event form a given @type; - */ -struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type) -{ - return tep_find_event(pevent, type); -} - -/** * tep_data_pid - parse the PID from record * @pevent: a handle to the pevent * @rec: the record to parse @@ -5288,8 +5319,8 @@ const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid) return comm; } -static struct cmdline * -pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next) +static struct tep_cmdline * +pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct tep_cmdline *next) { struct cmdline_list *cmdlist = (struct cmdline_list *)next; @@ -5301,7 +5332,7 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne while (cmdlist && strcmp(cmdlist->comm, comm) != 0) cmdlist = cmdlist->next; - return (struct cmdline *)cmdlist; + return (struct tep_cmdline *)cmdlist; } /** @@ -5313,14 +5344,14 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne * This returns the cmdline structure that holds a pid for a given * comm, or NULL if none found. As there may be more than one pid for * a given comm, the result of this call can be passed back into - * a recurring call in the @next paramater, and then it will find the + * a recurring call in the @next parameter, and then it will find the * next pid. - * Also, it does a linear seach, so it may be slow. + * Also, it does a linear search, so it may be slow. */ -struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, - struct cmdline *next) +struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, + struct tep_cmdline *next) { - struct cmdline *cmdline; + struct tep_cmdline *cmdline; /* * If the cmdlines have not been converted yet, then use @@ -5359,7 +5390,7 @@ struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *co * Returns the pid for a give cmdline. If @cmdline is NULL, then * -1 is returned. */ -int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline) +int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline) { struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; @@ -5387,7 +5418,7 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline) * This parses the raw @data using the given @event information and * writes the print format into the trace_seq. */ -void tep_event_info(struct trace_seq *s, struct tep_event_format *event, +void tep_event_info(struct trace_seq *s, struct tep_event *event, struct tep_record *record) { int print_pretty = 1; @@ -5409,7 +5440,7 @@ void tep_event_info(struct trace_seq *s, struct tep_event_format *event, static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) { - if (!use_trace_clock) + if (!trace_clock || !use_trace_clock) return true; if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global") @@ -5428,7 +5459,7 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) * Returns the associated event for a given record, or NULL if non is * is found. */ -struct tep_event_format * +struct tep_event * tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record) { int type; @@ -5453,7 +5484,7 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record) * Writes the tasks comm, pid and CPU to @s. */ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record) { void *data = record->data; @@ -5481,7 +5512,7 @@ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, * Writes the timestamp of the record into @s. */ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record, bool use_trace_clock) { @@ -5531,7 +5562,7 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, * Writes the parsing of the record's data to @s. */ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record) { static const char *spaces = " "; /* 20 spaces */ @@ -5550,7 +5581,7 @@ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, struct tep_record *record, bool use_trace_clock) { - struct tep_event_format *event; + struct tep_event *event; event = tep_find_event_by_record(pevent, record); if (!event) { @@ -5572,8 +5603,8 @@ void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, static int events_id_cmp(const void *a, const void *b) { - struct tep_event_format * const * ea = a; - struct tep_event_format * const * eb = b; + struct tep_event * const * ea = a; + struct tep_event * const * eb = b; if ((*ea)->id < (*eb)->id) return -1; @@ -5586,8 +5617,8 @@ static int events_id_cmp(const void *a, const void *b) static int events_name_cmp(const void *a, const void *b) { - struct tep_event_format * const * ea = a; - struct tep_event_format * const * eb = b; + struct tep_event * const * ea = a; + struct tep_event * const * eb = b; int res; res = strcmp((*ea)->name, (*eb)->name); @@ -5603,8 +5634,8 @@ static int events_name_cmp(const void *a, const void *b) static int events_system_cmp(const void *a, const void *b) { - struct tep_event_format * const * ea = a; - struct tep_event_format * const * eb = b; + struct tep_event * const * ea = a; + struct tep_event * const * eb = b; int res; res = strcmp((*ea)->system, (*eb)->system); @@ -5618,9 +5649,9 @@ static int events_system_cmp(const void *a, const void *b) return events_id_cmp(a, b); } -struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type) +struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type) { - struct tep_event_format **events; + struct tep_event **events; int (*sort)(const void *a, const void *b); events = pevent->sort_events; @@ -5703,7 +5734,7 @@ get_event_fields(const char *type, const char *name, * Returns an allocated array of fields. The last item in the array is NULL. * The array must be freed with free(). */ -struct tep_format_field **tep_event_common_fields(struct tep_event_format *event) +struct tep_format_field **tep_event_common_fields(struct tep_event *event) { return get_event_fields("common", event->name, event->format.nr_common, @@ -5717,7 +5748,7 @@ struct tep_format_field **tep_event_common_fields(struct tep_event_format *event * Returns an allocated array of fields. The last item in the array is NULL. * The array must be freed with free(). */ -struct tep_format_field **tep_event_fields(struct tep_event_format *event) +struct tep_format_field **tep_event_fields(struct tep_event *event) { return get_event_fields("event", event->name, event->format.nr_fields, @@ -5959,7 +5990,7 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si return 0; } -static int event_matches(struct tep_event_format *event, +static int event_matches(struct tep_event *event, int id, const char *sys_name, const char *event_name) { @@ -5982,7 +6013,7 @@ static void free_handler(struct event_handler *handle) free(handle); } -static int find_event_handle(struct tep_handle *pevent, struct tep_event_format *event) +static int find_event_handle(struct tep_handle *pevent, struct tep_event *event) { struct event_handler *handle, **next; @@ -6023,11 +6054,11 @@ static int find_event_handle(struct tep_handle *pevent, struct tep_event_format * * /sys/kernel/debug/tracing/events/.../.../format */ -enum tep_errno __tep_parse_format(struct tep_event_format **eventp, +enum tep_errno __tep_parse_format(struct tep_event **eventp, struct tep_handle *pevent, const char *buf, unsigned long size, const char *sys) { - struct tep_event_format *event; + struct tep_event *event; int ret; init_input_buf(buf, size); @@ -6132,12 +6163,12 @@ enum tep_errno __tep_parse_format(struct tep_event_format **eventp, static enum tep_errno __parse_event(struct tep_handle *pevent, - struct tep_event_format **eventp, + struct tep_event **eventp, const char *buf, unsigned long size, const char *sys) { int ret = __tep_parse_format(eventp, pevent, buf, size, sys); - struct tep_event_format *event = *eventp; + struct tep_event *event = *eventp; if (event == NULL) return ret; @@ -6154,7 +6185,7 @@ __parse_event(struct tep_handle *pevent, return 0; event_add_failed: - tep_free_format(event); + tep_free_event(event); return ret; } @@ -6174,7 +6205,7 @@ event_add_failed: * /sys/kernel/debug/tracing/events/.../.../format */ enum tep_errno tep_parse_format(struct tep_handle *pevent, - struct tep_event_format **eventp, + struct tep_event **eventp, const char *buf, unsigned long size, const char *sys) { @@ -6198,7 +6229,7 @@ enum tep_errno tep_parse_format(struct tep_handle *pevent, enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, unsigned long size, const char *sys) { - struct tep_event_format *event = NULL; + struct tep_event *event = NULL; return __parse_event(pevent, &event, buf, size, sys); } @@ -6235,7 +6266,7 @@ int get_field_val(struct trace_seq *s, struct tep_format_field *field, * * On failure, it returns NULL. */ -void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event, +void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, int *len, int err) { @@ -6282,7 +6313,7 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event, * * Returns 0 on success -1 on field not found. */ -int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err) { @@ -6307,7 +6338,7 @@ int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event, * * Returns 0 on success -1 on field not found. */ -int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err) { @@ -6332,7 +6363,7 @@ int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event * * Returns 0 on success -1 on field not found. */ -int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err) { @@ -6358,7 +6389,7 @@ int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event, * Returns: 0 on success, -1 field not found, or 1 if buffer is full. */ int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event_format *event, const char *name, + struct tep_event *event, const char *name, struct tep_record *record, int err) { struct tep_format_field *field = tep_find_field(event, name); @@ -6390,7 +6421,7 @@ int tep_print_num_field(struct trace_seq *s, const char *fmt, * Returns: 0 on success, -1 field not found, or 1 if buffer is full. */ int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event_format *event, const char *name, + struct tep_event *event, const char *name, struct tep_record *record, int err) { struct tep_format_field *field = tep_find_field(event, name); @@ -6550,11 +6581,11 @@ int tep_unregister_print_function(struct tep_handle *pevent, return -1; } -static struct tep_event_format *search_event(struct tep_handle *pevent, int id, - const char *sys_name, - const char *event_name) +static struct tep_event *search_event(struct tep_handle *pevent, int id, + const char *sys_name, + const char *event_name) { - struct tep_event_format *event; + struct tep_event *event; if (id >= 0) { /* search by id */ @@ -6589,12 +6620,18 @@ static struct tep_event_format *search_event(struct tep_handle *pevent, int id, * * If @id is >= 0, then it is used to find the event. * else @sys_name and @event_name are used. + * + * Returns: + * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten + * TEP_REGISTER_SUCCESS if a new handler is registered successfully + * negative TEP_ERRNO_... in case of an error + * */ int tep_register_event_handler(struct tep_handle *pevent, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context) { - struct tep_event_format *event; + struct tep_event *event; struct event_handler *handle; event = search_event(pevent, id, sys_name, event_name); @@ -6606,7 +6643,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id, event->handler = func; event->context = context; - return 0; + return TEP_REGISTER_SUCCESS_OVERWRITE; not_found: /* Save for later use. */ @@ -6636,7 +6673,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id, pevent->handlers = handle; handle->context = context; - return -1; + return TEP_REGISTER_SUCCESS; } static int handle_matches(struct event_handler *handler, int id, @@ -6678,7 +6715,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context) { - struct tep_event_format *event; + struct tep_event *event; struct event_handler *handle; struct event_handler **next; @@ -6719,8 +6756,10 @@ struct tep_handle *tep_alloc(void) { struct tep_handle *pevent = calloc(1, sizeof(*pevent)); - if (pevent) + if (pevent) { pevent->ref_count = 1; + pevent->host_bigendian = tep_host_bigendian(); + } return pevent; } @@ -6730,6 +6769,13 @@ void tep_ref(struct tep_handle *pevent) pevent->ref_count++; } +int tep_get_ref(struct tep_handle *tep) +{ + if (tep) + return tep->ref_count; + return 0; +} + void tep_free_format_field(struct tep_format_field *field) { free(field->type); @@ -6756,7 +6802,7 @@ static void free_formats(struct tep_format *format) free_format_fields(format->fields); } -void tep_free_format(struct tep_event_format *event) +void tep_free_event(struct tep_event *event) { free(event->name); free(event->system); @@ -6842,7 +6888,7 @@ void tep_free(struct tep_handle *pevent) } for (i = 0; i < pevent->nr_events; i++) - tep_free_format(pevent->events[i]); + tep_free_event(pevent->events[i]); while (pevent->handlers) { handle = pevent->handlers; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 16bf4c890b6f..aec48f2aea8a 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -57,11 +57,11 @@ struct tep_record { /* ----------------------- tep ----------------------- */ struct tep_handle; -struct tep_event_format; +struct tep_event; typedef int (*tep_event_handler_func)(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, + struct tep_event *event, void *context); typedef int (*tep_plugin_load_func)(struct tep_handle *pevent); @@ -143,7 +143,7 @@ enum tep_format_flags { struct tep_format_field { struct tep_format_field *next; - struct tep_event_format *event; + struct tep_event *event; char *type; char *name; char *alias; @@ -277,7 +277,7 @@ struct tep_print_fmt { struct tep_print_arg *args; }; -struct tep_event_format { +struct tep_event { struct tep_handle *pevent; char *name; int id; @@ -409,20 +409,6 @@ void tep_print_plugins(struct trace_seq *s, typedef char *(tep_func_resolver_t)(void *priv, unsigned long long *addrp, char **modp); void tep_set_flag(struct tep_handle *tep, int flag); -unsigned short __tep_data2host2(struct tep_handle *pevent, unsigned short data); -unsigned int __tep_data2host4(struct tep_handle *pevent, unsigned int data); -unsigned long long -__tep_data2host8(struct tep_handle *pevent, unsigned long long data); - -#define tep_data2host2(pevent, ptr) __tep_data2host2(pevent, *(unsigned short *)(ptr)) -#define tep_data2host4(pevent, ptr) __tep_data2host4(pevent, *(unsigned int *)(ptr)) -#define tep_data2host8(pevent, ptr) \ -({ \ - unsigned long long __val; \ - \ - memcpy(&__val, (ptr), sizeof(unsigned long long)); \ - __tep_data2host8(pevent, __val); \ -}) static inline int tep_host_bigendian(void) { @@ -446,6 +432,7 @@ int tep_set_function_resolver(struct tep_handle *pevent, tep_func_resolver_t *func, void *priv); void tep_reset_function_resolver(struct tep_handle *pevent); int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid); +int tep_override_comm(struct tep_handle *pevent, const char *comm, int pid); int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock); int tep_register_function(struct tep_handle *pevent, char *name, unsigned long long addr, char *mod); @@ -454,14 +441,14 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt, int tep_pid_is_registered(struct tep_handle *pevent, int pid); void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record); void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record, bool use_trace_clock); void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, - struct tep_event_format *event, + struct tep_event *event, struct tep_record *record); void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, struct tep_record *record, bool use_trace_clock); @@ -472,34 +459,37 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, unsigned long size, const char *sys); enum tep_errno tep_parse_format(struct tep_handle *pevent, - struct tep_event_format **eventp, + struct tep_event **eventp, const char *buf, unsigned long size, const char *sys); -void tep_free_format(struct tep_event_format *event); -void tep_free_format_field(struct tep_format_field *field); -void *tep_get_field_raw(struct trace_seq *s, struct tep_event_format *event, +void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, int *len, int err); -int tep_get_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err); -int tep_get_common_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err); -int tep_get_any_field_val(struct trace_seq *s, struct tep_event_format *event, +int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, const char *name, struct tep_record *record, unsigned long long *val, int err); int tep_print_num_field(struct trace_seq *s, const char *fmt, - struct tep_event_format *event, const char *name, + struct tep_event *event, const char *name, struct tep_record *record, int err); int tep_print_func_field(struct trace_seq *s, const char *fmt, - struct tep_event_format *event, const char *name, + struct tep_event *event, const char *name, struct tep_record *record, int err); +enum tep_reg_handler { + TEP_REGISTER_SUCCESS = 0, + TEP_REGISTER_SUCCESS_OVERWRITE, +}; + int tep_register_event_handler(struct tep_handle *pevent, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context); @@ -513,9 +503,9 @@ int tep_register_print_function(struct tep_handle *pevent, int tep_unregister_print_function(struct tep_handle *pevent, tep_func_handler func, char *name); -struct tep_format_field *tep_find_common_field(struct tep_event_format *event, const char *name); -struct tep_format_field *tep_find_field(struct tep_event_format *event, const char *name); -struct tep_format_field *tep_find_any_field(struct tep_event_format *event, const char *name); +struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name); +struct tep_format_field *tep_find_field(struct tep_event *event, const char *name); +struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name); const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr); unsigned long long @@ -524,40 +514,39 @@ unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, i int tep_read_number_field(struct tep_format_field *field, const void *data, unsigned long long *value); -struct tep_event_format *tep_get_first_event(struct tep_handle *tep); +struct tep_event *tep_get_first_event(struct tep_handle *tep); int tep_get_events_count(struct tep_handle *tep); -struct tep_event_format *tep_find_event(struct tep_handle *pevent, int id); +struct tep_event *tep_find_event(struct tep_handle *pevent, int id); -struct tep_event_format * +struct tep_event * tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name); -struct tep_event_format * +struct tep_event * tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record); void tep_data_lat_fmt(struct tep_handle *pevent, struct trace_seq *s, struct tep_record *record); int tep_data_type(struct tep_handle *pevent, struct tep_record *rec); -struct tep_event_format *tep_data_event_from_type(struct tep_handle *pevent, int type); int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec); int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec); int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec); const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid); -struct cmdline; -struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, - struct cmdline *next); -int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline); +struct tep_cmdline; +struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, + struct tep_cmdline *next); +int tep_cmdline_pid(struct tep_handle *pevent, struct tep_cmdline *cmdline); void tep_print_field(struct trace_seq *s, void *data, struct tep_format_field *field); void tep_print_fields(struct trace_seq *s, void *data, - int size __maybe_unused, struct tep_event_format *event); -void tep_event_info(struct trace_seq *s, struct tep_event_format *event, - struct tep_record *record); + int size __maybe_unused, struct tep_event *event); +void tep_event_info(struct trace_seq *s, struct tep_event *event, + struct tep_record *record); int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum, - char *buf, size_t buflen); + char *buf, size_t buflen); -struct tep_event_format **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type); -struct tep_format_field **tep_event_common_fields(struct tep_event_format *event); -struct tep_format_field **tep_event_fields(struct tep_event_format *event); +struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type); +struct tep_format_field **tep_event_common_fields(struct tep_event *event); +struct tep_format_field **tep_event_fields(struct tep_event *event); enum tep_endian { TEP_LITTLE_ENDIAN = 0, @@ -569,7 +558,7 @@ int tep_get_long_size(struct tep_handle *pevent); void tep_set_long_size(struct tep_handle *pevent, int long_size); int tep_get_page_size(struct tep_handle *pevent); void tep_set_page_size(struct tep_handle *pevent, int _page_size); -int tep_is_file_bigendian(struct tep_handle *pevent); +int tep_file_bigendian(struct tep_handle *pevent); void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian); int tep_is_host_bigendian(struct tep_handle *pevent); void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian); @@ -581,6 +570,7 @@ struct tep_handle *tep_alloc(void); void tep_free(struct tep_handle *pevent); void tep_ref(struct tep_handle *pevent); void tep_unref(struct tep_handle *pevent); +int tep_get_ref(struct tep_handle *tep); /* access to the internal parser */ void tep_buffer_init(const char *buf, unsigned long long size); @@ -712,7 +702,7 @@ struct tep_filter_arg { struct tep_filter_type { int event_id; - struct tep_event_format *event; + struct tep_event *event; struct tep_filter_arg *filter; }; diff --git a/tools/lib/traceevent/libtraceevent.pc.template b/tools/lib/traceevent/libtraceevent.pc.template new file mode 100644 index 000000000000..42e4d6cb6b9e --- /dev/null +++ b/tools/lib/traceevent/libtraceevent.pc.template @@ -0,0 +1,10 @@ +prefix=INSTALL_PREFIX +libdir=${prefix}/lib64 +includedir=${prefix}/include/traceevent + +Name: libtraceevent +URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git +Description: Linux kernel trace event library +Version: LIB_VERSION +Cflags: -I${includedir} +Libs: -L${libdir} -ltraceevent diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index ed87cb56713d..cb5ce66dab6e 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -27,7 +27,7 @@ static struct tep_format_field cpu = { struct event_list { struct event_list *next; - struct tep_event_format *event; + struct tep_event *event; }; static void show_error(char *error_buf, const char *fmt, ...) @@ -229,7 +229,7 @@ static void free_arg(struct tep_filter_arg *arg) } static int add_event(struct event_list **events, - struct tep_event_format *event) + struct tep_event *event) { struct event_list *list; @@ -243,7 +243,7 @@ static int add_event(struct event_list **events, return 0; } -static int event_match(struct tep_event_format *event, +static int event_match(struct tep_event *event, regex_t *sreg, regex_t *ereg) { if (sreg) { @@ -259,7 +259,7 @@ static enum tep_errno find_event(struct tep_handle *pevent, struct event_list **events, char *sys_name, char *event_name) { - struct tep_event_format *event; + struct tep_event *event; regex_t ereg; regex_t sreg; int match = 0; @@ -334,7 +334,7 @@ static void free_events(struct event_list *events) } static enum tep_errno -create_arg_item(struct tep_event_format *event, const char *token, +create_arg_item(struct tep_event *event, const char *token, enum tep_event_type type, struct tep_filter_arg **parg, char *error_str) { struct tep_format_field *field; @@ -940,7 +940,7 @@ static int collapse_tree(struct tep_filter_arg *arg, } static enum tep_errno -process_filter(struct tep_event_format *event, struct tep_filter_arg **parg, +process_filter(struct tep_event *event, struct tep_filter_arg **parg, char *error_str, int not) { enum tep_event_type type; @@ -1180,7 +1180,7 @@ process_filter(struct tep_event_format *event, struct tep_filter_arg **parg, } static enum tep_errno -process_event(struct tep_event_format *event, const char *filter_str, +process_event(struct tep_event *event, const char *filter_str, struct tep_filter_arg **parg, char *error_str) { int ret; @@ -1205,7 +1205,7 @@ process_event(struct tep_event_format *event, const char *filter_str, } static enum tep_errno -filter_event(struct tep_event_filter *filter, struct tep_event_format *event, +filter_event(struct tep_event_filter *filter, struct tep_event *event, const char *filter_str, char *error_str) { struct tep_filter_type *filter_type; @@ -1457,7 +1457,7 @@ static int copy_filter_type(struct tep_event_filter *filter, struct tep_filter_type *filter_type) { struct tep_filter_arg *arg; - struct tep_event_format *event; + struct tep_event *event; const char *sys; const char *name; char *str; @@ -1539,7 +1539,7 @@ int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *s { struct tep_handle *src_pevent; struct tep_handle *dest_pevent; - struct tep_event_format *event; + struct tep_event *event; struct tep_filter_type *filter_type; struct tep_filter_arg *arg; char *str; @@ -1683,11 +1683,11 @@ int tep_filter_event_has_trivial(struct tep_event_filter *filter, } } -static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg, +static int test_filter(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err); static const char * -get_comm(struct tep_event_format *event, struct tep_record *record) +get_comm(struct tep_event *event, struct tep_record *record) { const char *comm; int pid; @@ -1698,7 +1698,7 @@ get_comm(struct tep_event_format *event, struct tep_record *record) } static unsigned long long -get_value(struct tep_event_format *event, +get_value(struct tep_event *event, struct tep_format_field *field, struct tep_record *record) { unsigned long long val; @@ -1734,11 +1734,11 @@ get_value(struct tep_event_format *event, } static unsigned long long -get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg, +get_arg_value(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err); static unsigned long long -get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg, +get_exp_value(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { unsigned long long lval, rval; @@ -1793,7 +1793,7 @@ get_exp_value(struct tep_event_format *event, struct tep_filter_arg *arg, } static unsigned long long -get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg, +get_arg_value(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { switch (arg->type) { @@ -1817,7 +1817,7 @@ get_arg_value(struct tep_event_format *event, struct tep_filter_arg *arg, return 0; } -static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg, +static int test_num(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { unsigned long long lval, rval; @@ -1860,7 +1860,7 @@ static int test_num(struct tep_event_format *event, struct tep_filter_arg *arg, static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record) { - struct tep_event_format *event; + struct tep_event *event; struct tep_handle *pevent; unsigned long long addr; const char *val = NULL; @@ -1908,7 +1908,7 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * return val; } -static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg, +static int test_str(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { const char *val; @@ -1939,7 +1939,7 @@ static int test_str(struct tep_event_format *event, struct tep_filter_arg *arg, } } -static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg, +static int test_op(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { switch (arg->op.type) { @@ -1961,7 +1961,7 @@ static int test_op(struct tep_event_format *event, struct tep_filter_arg *arg, } } -static int test_filter(struct tep_event_format *event, struct tep_filter_arg *arg, +static int test_filter(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err) { if (*err) { diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c index 528acc75d81a..a73eca34a8f9 100644 --- a/tools/lib/traceevent/plugin_function.c +++ b/tools/lib/traceevent/plugin_function.c @@ -124,7 +124,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu) } static int function_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { struct tep_handle *pevent = event->pevent; unsigned long long function; diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c index 9aa05b4ca811..5db5e401275f 100644 --- a/tools/lib/traceevent/plugin_hrtimer.c +++ b/tools/lib/traceevent/plugin_hrtimer.c @@ -27,7 +27,7 @@ static int timer_expire_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { trace_seq_printf(s, "hrtimer="); @@ -47,7 +47,7 @@ static int timer_expire_handler(struct trace_seq *s, static int timer_start_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { trace_seq_printf(s, "hrtimer="); diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c index 1beb4eaddfdf..0e3c601f9ed1 100644 --- a/tools/lib/traceevent/plugin_kmem.c +++ b/tools/lib/traceevent/plugin_kmem.c @@ -25,7 +25,7 @@ #include "trace-seq.h" static int call_site_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { struct tep_format_field *field; unsigned long long val, addr; diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c index d13c22846fa9..64b9c25a1fd3 100644 --- a/tools/lib/traceevent/plugin_kvm.c +++ b/tools/lib/traceevent/plugin_kvm.c @@ -249,7 +249,7 @@ static const char *find_exit_reason(unsigned isa, int val) } static int print_exit_reason(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, const char *field) + struct tep_event *event, const char *field) { unsigned long long isa; unsigned long long val; @@ -270,7 +270,7 @@ static int print_exit_reason(struct trace_seq *s, struct tep_record *record, } static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { unsigned long long info1 = 0, info2 = 0; @@ -293,7 +293,7 @@ static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record, static int kvm_emulate_insn_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { unsigned long long rip, csbase, len, flags, failed; int llen; @@ -332,7 +332,7 @@ static int kvm_emulate_insn_handler(struct trace_seq *s, static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { if (print_exit_reason(s, record, event, "exit_code") < 0) return -1; @@ -346,7 +346,7 @@ static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_reco } static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { tep_print_num_field(s, "rip %llx ", event, "rip", record, 1); @@ -372,7 +372,7 @@ union kvm_mmu_page_role { }; static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { unsigned long long val; static const char *access_str[] = { @@ -387,9 +387,9 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, /* * We can only use the structure if file is of the same - * endianess. + * endianness. */ - if (tep_is_file_bigendian(event->pevent) == + if (tep_file_bigendian(event->pevent) == tep_is_host_bigendian(event->pevent)) { trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s", @@ -419,7 +419,7 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, static int kvm_mmu_get_page_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { unsigned long long val; diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c index da3855e7b86f..e38b9477aad2 100644 --- a/tools/lib/traceevent/plugin_mac80211.c +++ b/tools/lib/traceevent/plugin_mac80211.c @@ -26,7 +26,7 @@ #define INDENT 65 -static void print_string(struct trace_seq *s, struct tep_event_format *event, +static void print_string(struct trace_seq *s, struct tep_event *event, const char *name, const void *data) { struct tep_format_field *f = tep_find_field(event, name); @@ -60,7 +60,7 @@ static void print_string(struct trace_seq *s, struct tep_event_format *event, static int drv_bss_info_changed(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { void *data = record->data; diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index 77882272672f..834c9e378ff8 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -67,7 +67,7 @@ static void write_and_save_comm(struct tep_format_field *field, static int sched_wakeup_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { struct tep_format_field *field; unsigned long long val; @@ -96,7 +96,7 @@ static int sched_wakeup_handler(struct trace_seq *s, static int sched_switch_handler(struct trace_seq *s, struct tep_record *record, - struct tep_event_format *event, void *context) + struct tep_event *event, void *context) { struct tep_format_field *field; unsigned long long val; diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index 8ff1d55954d1..8d5ecd2bf877 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -100,7 +100,8 @@ static void expand_buffer(struct trace_seq *s) * @fmt: printf format string * * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. + * space, the number of characters printed, or a negative + * value in case of an error. * * The tracer may use either sequence operations or its own * copy to user routines. To simplify formating of a trace @@ -129,9 +130,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) goto try_again; } - s->len += ret; + if (ret > 0) + s->len += ret; - return 1; + return ret; } /** @@ -139,6 +141,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) * @s: trace sequence descriptor * @fmt: printf format string * + * It returns 0 if the trace oversizes the buffer's free + * space, the number of characters printed, or a negative + * value in case of an error. + * * * The tracer may use either sequence operations or its own * copy to user routines. To simplify formating of a trace * trace_seq_printf is used to store strings into a special @@ -163,9 +169,10 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) goto try_again; } - s->len += ret; + if (ret > 0) + s->len += ret; - return len; + return ret; } /** |