diff options
Diffstat (limited to 'tools/testing/selftests')
653 files changed, 51131 insertions, 7778 deletions
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index 8059ce834247..61df01cdf0b2 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -2,3 +2,5 @@ gpiogpio-event-mon gpiogpio-hammer gpioinclude/ gpiolsgpio +tpm2/SpaceTest.log +tpm2/*.pyc diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 25b43a8c2b15..63430e2664c2 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 TARGETS = android +TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities TARGETS += cgroup +TARGETS += clone3 TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += drivers/dma-buf @@ -11,6 +13,7 @@ TARGETS += efivarfs TARGETS += exec TARGETS += filesystems TARGETS += filesystems/binderfs +TARGETS += filesystems/epoll TARGETS += firmware TARGETS += ftrace TARGETS += futex @@ -23,12 +26,14 @@ TARGETS += kexec TARGETS += kvm TARGETS += lib TARGETS += livepatch +TARGETS += lkdtm TARGETS += membarrier TARGETS += memfd TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += net/mptcp TARGETS += netfilter TARGETS += networking/timestamping TARGETS += nsfs @@ -37,6 +42,7 @@ TARGETS += powerpc TARGETS += proc TARGETS += pstore TARGETS += ptrace +TARGETS += openat2 TARGETS += rseq TARGETS += rtc TARGETS += seccomp @@ -47,6 +53,7 @@ TARGETS += splice TARGETS += static_keys TARGETS += sync TARGETS += sysctl +TARGETS += timens ifneq (1, $(quicktest)) TARGETS += timers endif @@ -63,6 +70,13 @@ TARGETS += zram TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug +# User can optionally provide a TARGETS skiplist. +SKIP_TARGETS ?= +ifneq ($(SKIP_TARGETS),) + TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) + override TARGETS := $(TMP) +endif + # Clear LDFLAGS and MAKEFLAGS if called from main # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. @@ -126,66 +140,74 @@ endif # in the default INSTALL_HDR_PATH usr/include. khdr: ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif all: khdr - @for TARGET in $(TARGETS); do \ - BUILD_TARGET=$$BUILD/$$TARGET; \ - mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ - done; + @ret=1; \ + for TARGET in $(TARGETS); do \ + BUILD_TARGET=$$BUILD/$$TARGET; \ + mkdir $$BUILD_TARGET -p; \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET; \ + ret=$$((ret * $$?)); \ + done; exit $$ret; run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_hotplug: hotplug @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ done; clean_hotplug: @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; run_pstore_crash: - make -C pstore run_crash + $(MAKE) -C pstore run_crash # Use $BUILD as the default install root. $BUILD points to the # right output location for the following cases: # 1. output_dir=kernel_src # 2. a separate output directory is specified using O= KBUILD_OUTPUT # 3. a separate output directory is specified using KBUILD_OUTPUT +# Avoid conflict with INSTALL_PATH set by the main Makefile # -INSTALL_PATH ?= $(BUILD)/install -INSTALL_PATH := $(abspath $(INSTALL_PATH)) +KSFT_INSTALL_PATH ?= $(BUILD)/kselftest_install +KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) +# Avoid changing the rest of the logic here and lib.mk. +INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh install: all ifdef INSTALL_PATH @# Ask all targets to install their files mkdir -p $(INSTALL_PATH)/kselftest + install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ - @for TARGET in $(TARGETS); do \ + @ret=1; \ + for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ - done; + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + ret=$$((ret * $$?)); \ + done; exit $$ret; @# Ask all targets to emit their test scripts echo "#!/bin/sh" > $(ALL_SCRIPT) @@ -198,12 +220,17 @@ ifdef INSTALL_PATH echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) echo "fi" >> $(ALL_SCRIPT) + @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# they could be the result of a build failure and should NOT be + @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ + [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ echo -n "run_many" >> $(ALL_SCRIPT); \ - make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + echo -n "Emit Tests for $$TARGET\n"; \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "" >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ done; @@ -216,7 +243,7 @@ endif clean: @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; .PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile new file mode 100644 index 000000000000..93b567d23c8b --- /dev/null +++ b/tools/testing/selftests/arm64/Makefile @@ -0,0 +1,66 @@ +# SPDX-License-Identifier: GPL-2.0 + +# When ARCH not overridden for crosscompiling, lookup machine +ARCH ?= $(shell uname -m 2>/dev/null || echo not) + +ifneq (,$(filter $(ARCH),aarch64 arm64)) +ARM64_SUBTARGETS ?= tags signal +else +ARM64_SUBTARGETS := +endif + +CFLAGS := -Wall -O2 -g + +# A proper top_srcdir is needed by KSFT(lib.mk) +top_srcdir = $(realpath ../../../../) + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ + +# Guessing where the Kernel headers could have been installed +# depending on ENV config +ifeq ($(KBUILD_OUTPUT),) +khdr_dir = $(top_srcdir)/usr/include +else +# the KSFT preferred location when KBUILD_OUTPUT is set +khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include +endif + +CFLAGS += -I$(khdr_dir) + +export CFLAGS +export top_srcdir + +all: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + mkdir -p $$BUILD_TARGET; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +install: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +run_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +# Avoid any output on non arm64 on emit_tests +emit_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +clean: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +.PHONY: all clean install run_tests emit_tests diff --git a/tools/testing/selftests/arm64/README b/tools/testing/selftests/arm64/README new file mode 100644 index 000000000000..a1badd882102 --- /dev/null +++ b/tools/testing/selftests/arm64/README @@ -0,0 +1,25 @@ +KSelfTest ARM64 +=============== + +- These tests are arm64 specific and so not built or run but just skipped + completely when env-variable ARCH is found to be different than 'arm64' + and `uname -m` reports other than 'aarch64'. + +- Holding true the above, ARM64 KSFT tests can be run within the KSelfTest + framework using standard Linux top-level-makefile targets: + + $ make TARGETS=arm64 kselftest-clean + $ make TARGETS=arm64 kselftest + + or + + $ make -C tools/testing/selftests TARGETS=arm64 \ + INSTALL_PATH=<your-installation-path> install + + or, alternatively, only specific arm64/ subtargets can be picked: + + $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS="tags signal" \ + INSTALL_PATH=<your-installation-path> install + + Further details on building and running KFST can be found in: + Documentation/dev-tools/kselftest.rst diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore new file mode 100644 index 000000000000..3c5b4e8ff894 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -0,0 +1,3 @@ +mangle_* +fake_sigreturn_* +!*.[ch] diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile new file mode 100644 index 000000000000..b497cfea4643 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 ARM Limited + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS)) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ + +clean: + $(CLEAN) + rm -f $(PROGS) + +# Common test-unit targets to build common-layout test-cases executables +# Needs secondary expansion to properly include the testcase c-file in pre-reqs +.SECONDEXPANSION: +$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h + $(CC) $(CFLAGS) $^ -o $@ diff --git a/tools/testing/selftests/arm64/signal/README b/tools/testing/selftests/arm64/signal/README new file mode 100644 index 000000000000..967a531b245c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/README @@ -0,0 +1,59 @@ +KSelfTest arm64/signal/ +======================= + +Signals Tests ++++++++++++++ + +- Tests are built around a common main compilation unit: such shared main + enforces a standard sequence of operations needed to perform a single + signal-test (setup/trigger/run/result/cleanup) + +- The above mentioned ops are configurable on a test-by-test basis: each test + is described (and configured) using the descriptor signals.h::struct tdescr + +- Each signal testcase is compiled into its own executable: a separate + executable is used for each test since many tests complete successfully + by receiving some kind of fatal signal from the Kernel, so it's safer + to run each test unit in its own standalone process, so as to start each + test from a clean slate. + +- New tests can be simply defined in testcases/ dir providing a proper struct + tdescr overriding all the defaults we wish to change (as of now providing a + custom run method is mandatory though) + +- Signals' test-cases hereafter defined belong currently to two + principal families: + + - 'mangle_' tests: a real signal (SIGUSR1) is raised and used as a trigger + and then the test case code modifies the signal frame from inside the + signal handler itself. + + - 'fake_sigreturn_' tests: a brand new custom artificial sigframe structure + is placed on the stack and a sigreturn syscall is called to simulate a + real signal return. This kind of tests does not use a trigger usually and + they are just fired using some simple included assembly trampoline code. + + - Most of these tests are successfully passing if the process gets killed by + some fatal signal: usually SIGSEGV or SIGBUS. Since while writing this + kind of tests it is extremely easy in fact to end-up injecting other + unrelated SEGV bugs in the testcases, it becomes extremely tricky to + be really sure that the tests are really addressing what they are meant + to address and they are not instead falling apart due to unplanned bugs + in the test code. + In order to alleviate the misery of the life of such test-developer, a few + helpers are provided: + + - a couple of ASSERT_BAD/GOOD_CONTEXT() macros to easily parse a ucontext_t + and verify if it is indeed GOOD or BAD (depending on what we were + expecting), using the same logic/perspective as in the arm64 Kernel signals + routines. + + - a sanity mechanism to be used in 'fake_sigreturn_'-alike tests: enabled by + default it takes care to verify that the test-execution had at least + successfully progressed up to the stage of triggering the fake sigreturn + call. + + In both cases test results are expected in terms of: + - some fatal signal sent by the Kernel to the test process + or + - analyzing some final regs state diff --git a/tools/testing/selftests/arm64/signal/signals.S b/tools/testing/selftests/arm64/signal/signals.S new file mode 100644 index 000000000000..9f8c1aefc3b9 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/signals.S @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#include <asm/unistd.h> + +.section .rodata, "a" +call_fmt: + .asciz "Calling sigreturn with fake sigframe sized:%zd at SP @%08lX\n" + +.text + +.globl fake_sigreturn + +/* fake_sigreturn x0:&sigframe, x1:sigframe_size, x2:misalign_bytes */ +fake_sigreturn: + stp x29, x30, [sp, #-16]! + mov x29, sp + + mov x20, x0 + mov x21, x1 + mov x22, x2 + + /* create space on the stack for fake sigframe 16 bytes-aligned */ + add x0, x21, x22 + add x0, x0, #15 + bic x0, x0, #15 /* round_up(sigframe_size + misalign_bytes, 16) */ + sub sp, sp, x0 + add x23, sp, x22 /* new sigframe base with misaligment if any */ + + ldr x0, =call_fmt + mov x1, x21 + mov x2, x23 + bl printf + + /* memcpy the provided content, while still keeping SP aligned */ + mov x0, x23 + mov x1, x20 + mov x2, x21 + bl memcpy + + /* + * Here saving a last minute SP to current->token acts as a marker: + * if we got here, we are successfully faking a sigreturn; in other + * words we are sure no bad fatal signal has been raised till now + * for unrelated reasons, so we should consider the possibly observed + * fatal signal like SEGV coming from Kernel restore_sigframe() and + * triggered as expected from our test-case. + * For simplicity this assumes that current field 'token' is laid out + * as first in struct tdescr + */ + ldr x0, current + str x23, [x0] + /* finally move SP to misaligned address...if any requested */ + mov sp, x23 + + mov x8, #__NR_rt_sigreturn + svc #0 + + /* + * Above sigreturn should not return...looping here leads to a timeout + * and ensure proper and clean test failure, instead of jumping around + * on a potentially corrupted stack. + */ + b . diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c new file mode 100644 index 000000000000..416b1ff43199 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Generic test wrapper for arm64 signal tests. + * + * Each test provides its own tde struct tdescr descriptor to link with + * this wrapper. Framework provides common helpers. + */ +#include <kselftest.h> + +#include "test_signals.h" +#include "test_signals_utils.h" + +struct tdescr *current; + +int main(int argc, char *argv[]) +{ + current = &tde; + + ksft_print_msg("%s :: %s\n", current->name, current->descr); + if (test_setup(current) && test_init(current)) { + test_run(current); + test_cleanup(current); + } + test_result(current); + + return current->result; +} diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h new file mode 100644 index 000000000000..f96baf1cef1a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_H__ +#define __TEST_SIGNALS_H__ + +#include <signal.h> +#include <stdbool.h> +#include <ucontext.h> + +/* + * Using ARCH specific and sanitized Kernel headers installed by KSFT + * framework since we asked for it by setting flag KSFT_KHDR_INSTALL + * in our Makefile. + */ +#include <asm/ptrace.h> +#include <asm/hwcap.h> + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define get_regval(regname, out) \ +{ \ + asm volatile("mrs %0, " __stringify(regname) \ + : "=r" (out) \ + : \ + : "memory"); \ +} + +/* + * Feature flags used in tdescr.feats_required to specify + * any feature by the test + */ +enum { + FSSBS_BIT, + FMAX_END +}; + +#define FEAT_SSBS (1UL << FSSBS_BIT) + +/* + * A descriptor used to describe and configure a test case. + * Fields with a non-trivial meaning are described inline in the following. + */ +struct tdescr { + /* KEEP THIS FIELD FIRST for easier lookup from assembly */ + void *token; + /* when disabled token based sanity checking is skipped in handler */ + bool sanity_disabled; + /* just a name for the test-case; manadatory field */ + char *name; + char *descr; + unsigned long feats_required; + /* bitmask of effectively supported feats: populated at run-time */ + unsigned long feats_supported; + bool initialized; + unsigned int minsigstksz; + /* signum used as a test trigger. Zero if no trigger-signal is used */ + int sig_trig; + /* + * signum considered as a successful test completion. + * Zero when no signal is expected on success + */ + int sig_ok; + /* signum expected on unsupported CPU features. */ + int sig_unsupp; + /* a timeout in second for test completion */ + unsigned int timeout; + bool triggered; + bool pass; + unsigned int result; + /* optional sa_flags for the installed handler */ + int sa_flags; + ucontext_t saved_uc; + /* used by get_current_ctx() */ + size_t live_sz; + ucontext_t *live_uc; + volatile sig_atomic_t live_uc_valid; + /* optional test private data */ + void *priv; + + /* a custom setup: called alternatively to default_setup */ + int (*setup)(struct tdescr *td); + /* a custom init: called by default test init after test_setup */ + bool (*init)(struct tdescr *td); + /* a custom cleanup function called before test exits */ + void (*cleanup)(struct tdescr *td); + /* an optional function to be used as a trigger for starting test */ + int (*trigger)(struct tdescr *td); + /* + * the actual test-core: invoked differently depending on the + * presence of the trigger function above; this is mandatory + */ + int (*run)(struct tdescr *td, siginfo_t *si, ucontext_t *uc); + /* an optional function for custom results' processing */ + void (*check_result)(struct tdescr *td); +}; + +extern struct tdescr tde; +#endif diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c new file mode 100644 index 000000000000..2de6e5ed5e25 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <sys/auxv.h> +#include <linux/auxvec.h> +#include <ucontext.h> + +#include <asm/unistd.h> + +#include <kselftest.h> + +#include "test_signals.h" +#include "test_signals_utils.h" +#include "testcases/testcases.h" + + +extern struct tdescr *current; + +static int sig_copyctx = SIGTRAP; + +static char const *const feats_names[FMAX_END] = { + " SSBS ", +}; + +#define MAX_FEATS_SZ 128 +static char feats_string[MAX_FEATS_SZ]; + +static inline char *feats_to_string(unsigned long feats) +{ + size_t flen = MAX_FEATS_SZ - 1; + + for (int i = 0; i < FMAX_END; i++) { + if (feats & (1UL << i)) { + size_t tlen = strlen(feats_names[i]); + + assert(flen > tlen); + flen -= tlen; + strncat(feats_string, feats_names[i], flen); + } + } + + return feats_string; +} + +static void unblock_signal(int signum) +{ + sigset_t sset; + + sigemptyset(&sset); + sigaddset(&sset, signum); + sigprocmask(SIG_UNBLOCK, &sset, NULL); +} + +static void default_result(struct tdescr *td, bool force_exit) +{ + if (td->result == KSFT_SKIP) { + fprintf(stderr, "==>> completed. SKIP.\n"); + } else if (td->pass) { + fprintf(stderr, "==>> completed. PASS(1)\n"); + td->result = KSFT_PASS; + } else { + fprintf(stdout, "==>> completed. FAIL(0)\n"); + td->result = KSFT_FAIL; + } + + if (force_exit) + exit(td->result); +} + +/* + * The following handle_signal_* helpers are used by main default_handler + * and are meant to return true when signal is handled successfully: + * when false is returned instead, it means that the signal was somehow + * unexpected in that context and it was NOT handled; default_handler will + * take care of such unexpected situations. + */ + +static bool handle_signal_unsupported(struct tdescr *td, + siginfo_t *si, void *uc) +{ + if (feats_ok(td)) + return false; + + /* Mangling PC to avoid loops on original SIGILL */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + + if (!td->initialized) { + fprintf(stderr, + "Got SIG_UNSUPP @test_init. Ignore.\n"); + } else { + fprintf(stderr, + "-- RX SIG_UNSUPP on unsupported feat...OK\n"); + td->pass = 1; + default_result(current, 1); + } + + return true; +} + +static bool handle_signal_trigger(struct tdescr *td, + siginfo_t *si, void *uc) +{ + td->triggered = 1; + /* ->run was asserted NON-NULL in test_setup() already */ + td->run(td, si, uc); + + return true; +} + +static bool handle_signal_ok(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* + * it's a bug in the test code when this assert fail: + * if sig_trig was defined, it must have been used before getting here. + */ + assert(!td->sig_trig || td->triggered); + fprintf(stderr, + "SIG_OK -- SP:0x%llX si_addr@:%p si_code:%d token@:%p offset:%ld\n", + ((ucontext_t *)uc)->uc_mcontext.sp, + si->si_addr, si->si_code, td->token, td->token - si->si_addr); + /* + * fake_sigreturn tests, which have sanity_enabled=1, set, at the very + * last time, the token field to the SP address used to place the fake + * sigframe: so token==0 means we never made it to the end, + * segfaulting well-before, and the test is possibly broken. + */ + if (!td->sanity_disabled && !td->token) { + fprintf(stdout, + "current->token ZEROED...test is probably broken!\n"); + abort(); + } + /* + * Trying to narrow down the SEGV to the ones generated by Kernel itself + * via arm64_notify_segfault(). This is a best-effort check anyway, and + * the si_code check may need to change if this aspect of the kernel + * ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } + td->pass = 1; + /* + * Some tests can lead to SEGV loops: in such a case we want to + * terminate immediately exiting straight away; some others are not + * supposed to outlive the signal handler code, due to the content of + * the fake sigframe which caused the signal itself. + */ + default_result(current, 1); + + return true; +} + +static bool handle_signal_copyctx(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* Mangling PC to avoid loops on original BRK instr */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + memcpy(td->live_uc, uc, td->live_sz); + ASSERT_GOOD_CONTEXT(td->live_uc); + td->live_uc_valid = 1; + fprintf(stderr, + "GOOD CONTEXT grabbed from sig_copyctx handler\n"); + + return true; +} + +static void default_handler(int signum, siginfo_t *si, void *uc) +{ + if (current->sig_unsupp && signum == current->sig_unsupp && + handle_signal_unsupported(current, si, uc)) { + fprintf(stderr, "Handled SIG_UNSUPP\n"); + } else if (current->sig_trig && signum == current->sig_trig && + handle_signal_trigger(current, si, uc)) { + fprintf(stderr, "Handled SIG_TRIG\n"); + } else if (current->sig_ok && signum == current->sig_ok && + handle_signal_ok(current, si, uc)) { + fprintf(stderr, "Handled SIG_OK\n"); + } else if (signum == sig_copyctx && current->live_uc && + handle_signal_copyctx(current, si, uc)) { + fprintf(stderr, "Handled SIG_COPYCTX\n"); + } else { + if (signum == SIGALRM && current->timeout) { + fprintf(stderr, "-- Timeout !\n"); + } else { + fprintf(stderr, + "-- RX UNEXPECTED SIGNAL: %d\n", signum); + } + default_result(current, 1); + } +} + +static int default_setup(struct tdescr *td) +{ + struct sigaction sa; + + sa.sa_sigaction = default_handler; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sa.sa_flags |= td->sa_flags; + sigemptyset(&sa.sa_mask); + /* uncatchable signals naturally skipped ... */ + for (int sig = 1; sig < 32; sig++) + sigaction(sig, &sa, NULL); + /* + * RT Signals default disposition is Term but they cannot be + * generated by the Kernel in response to our tests; so just catch + * them all and report them as UNEXPECTED signals. + */ + for (int sig = SIGRTMIN; sig <= SIGRTMAX; sig++) + sigaction(sig, &sa, NULL); + + /* just in case...unblock explicitly all we need */ + if (td->sig_trig) + unblock_signal(td->sig_trig); + if (td->sig_ok) + unblock_signal(td->sig_ok); + if (td->sig_unsupp) + unblock_signal(td->sig_unsupp); + + if (td->timeout) { + unblock_signal(SIGALRM); + alarm(td->timeout); + } + fprintf(stderr, "Registered handlers for all signals.\n"); + + return 1; +} + +static inline int default_trigger(struct tdescr *td) +{ + return !raise(td->sig_trig); +} + +int test_init(struct tdescr *td) +{ + if (td->sig_trig == sig_copyctx) { + fprintf(stdout, + "Signal %d is RESERVED, cannot be used as a trigger. Aborting\n", + sig_copyctx); + return 0; + } + /* just in case */ + unblock_signal(sig_copyctx); + + td->minsigstksz = getauxval(AT_MINSIGSTKSZ); + if (!td->minsigstksz) + td->minsigstksz = MINSIGSTKSZ; + fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); + + if (td->feats_required) { + td->feats_supported = 0; + /* + * Checking for CPU required features using both the + * auxval and the arm64 MRS Emulation to read sysregs. + */ + if (getauxval(AT_HWCAP) & HWCAP_SSBS) + td->feats_supported |= FEAT_SSBS; + if (feats_ok(td)) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + else + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + } + + /* Perform test specific additional initialization */ + if (td->init && !td->init(td)) { + fprintf(stderr, "FAILED Testcase initialization.\n"); + return 0; + } + td->initialized = 1; + fprintf(stderr, "Testcase initialized.\n"); + + return 1; +} + +int test_setup(struct tdescr *td) +{ + /* assert core invariants symptom of a rotten testcase */ + assert(current); + assert(td); + assert(td->name); + assert(td->run); + + /* Default result is FAIL if test setup fails */ + td->result = KSFT_FAIL; + if (td->setup) + return td->setup(td); + else + return default_setup(td); +} + +int test_run(struct tdescr *td) +{ + if (td->sig_trig) { + if (td->trigger) + return td->trigger(td); + else + return default_trigger(td); + } else { + return td->run(td, NULL, NULL); + } +} + +void test_result(struct tdescr *td) +{ + if (td->initialized && td->result != KSFT_SKIP && td->check_result) + td->check_result(td); + default_result(td, 0); +} + +void test_cleanup(struct tdescr *td) +{ + if (td->cleanup) + td->cleanup(td); +} diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h new file mode 100644 index 000000000000..6772b5c8d274 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_UTILS_H__ +#define __TEST_SIGNALS_UTILS_H__ + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "test_signals.h" + +int test_init(struct tdescr *td); +int test_setup(struct tdescr *td); +void test_cleanup(struct tdescr *td); +int test_run(struct tdescr *td); +void test_result(struct tdescr *td); + +static inline bool feats_ok(struct tdescr *td) +{ + return (td->feats_required & td->feats_supported) == td->feats_required; +} + +/* + * Obtaining a valid and full-blown ucontext_t from userspace is tricky: + * libc getcontext does() not save all the regs and messes with some of + * them (pstate value in particular is not reliable). + * + * Here we use a service signal to grab the ucontext_t from inside a + * dedicated signal handler, since there, it is populated by Kernel + * itself in setup_sigframe(). The grabbed context is then stored and + * made available in td->live_uc. + * + * As service-signal is used a SIGTRAP induced by a 'brk' instruction, + * because here we have to avoid syscalls to trigger the signal since + * they would cause any SVE sigframe content (if any) to be removed. + * + * Anyway this function really serves a dual purpose: + * + * 1. grab a valid sigcontext into td->live_uc for result analysis: in + * such case it returns 1. + * + * 2. detect if, somehow, a previously grabbed live_uc context has been + * used actively with a sigreturn: in such a case the execution would have + * magically resumed in the middle of this function itself (seen_already==1): + * in such a case return 0, since in fact we have not just simply grabbed + * the context. + * + * This latter case is useful to detect when a fake_sigreturn test-case has + * unexpectedly survived without hitting a SEGV. + * + * Note that the case of runtime dynamically sized sigframes (like in SVE + * context) is still NOT addressed: sigframe size is supposed to be fixed + * at sizeof(ucontext_t). + */ +static __always_inline bool get_current_context(struct tdescr *td, + ucontext_t *dest_uc) +{ + static volatile bool seen_already; + + assert(td && dest_uc); + /* it's a genuine invocation..reinit */ + seen_already = 0; + td->live_uc_valid = 0; + td->live_sz = sizeof(*dest_uc); + memset(dest_uc, 0x00, td->live_sz); + td->live_uc = dest_uc; + /* + * Grab ucontext_t triggering a SIGTRAP. + * + * Note that: + * - live_uc_valid is declared volatile sig_atomic_t in + * struct tdescr since it will be changed inside the + * sig_copyctx handler + * - the additional 'memory' clobber is there to avoid possible + * compiler's assumption on live_uc_valid and the content + * pointed by dest_uc, which are all changed inside the signal + * handler + * - BRK causes a debug exception which is handled by the Kernel + * and finally causes the SIGTRAP signal to be delivered to this + * test thread. Since such delivery happens on the ret_to_user() + * /do_notify_resume() debug exception return-path, we are sure + * that the registered SIGTRAP handler has been run to completion + * before the execution path is restored here: as a consequence + * we can be sure that the volatile sig_atomic_t live_uc_valid + * carries a meaningful result. Being in a single thread context + * we'll also be sure that any access to memory modified by the + * handler (namely ucontext_t) will be visible once returned. + * - note that since we are using a breakpoint instruction here + * to cause a SIGTRAP, the ucontext_t grabbed from the signal + * handler would naturally contain a PC pointing exactly to this + * BRK line, which means that, on return from the signal handler, + * or if we place the ucontext_t on the stack to fake a sigreturn, + * we'll end up in an infinite loop of BRK-SIGTRAP-handler. + * For this reason we take care to artificially move forward the + * PC to the next instruction while inside the signal handler. + */ + asm volatile ("brk #666" + : "+m" (*dest_uc) + : + : "memory"); + + /* + * If we get here with seen_already==1 it implies the td->live_uc + * context has been used to get back here....this probably means + * a test has failed to cause a SEGV...anyway live_uc does not + * point to a just acquired copy of ucontext_t...so return 0 + */ + if (seen_already) { + fprintf(stdout, + "Unexpected successful sigreturn detected: live_uc is stale !\n"); + return 0; + } + seen_already = 1; + + return td->live_uc_valid; +} + +int fake_sigreturn(void *sigframe, size_t sz, int misalign_bytes); +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c new file mode 100644 index 000000000000..8dc600a7d4fd --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a BAD Unknown magic + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_magic_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */ + head = get_starting_head(shead, HDR_SZ * 2, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* + * use a well known NON existent bad magic...something + * we should pretty sure won't be ever defined in Kernel + */ + head->magic = KSFT_BAD_MAGIC; + head->size = HDR_SZ; + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_MAGIC", + .descr = "Trigger a sigreturn with a sigframe with a bad magic", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_magic_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c new file mode 100644 index 000000000000..b3c362100666 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a bad record overflowing + * the __reserved space: on sigreturn Kernel must spot this attempt and + * the test case is expected to be terminated via SEGV. + */ + +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +#define MIN_SZ_ALIGN 16 + +static int fake_sigreturn_bad_size_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, need_sz, offset; + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + /* at least HDR_SZ + bad sized esr_context needed */ + need_sz = sizeof(struct esr_context) + HDR_SZ; + head = get_starting_head(shead, need_sz, resv_sz, &offset); + if (!head) + return 0; + + /* + * Use an esr_context to build a fake header with a + * size greater then the free __reserved area minus HDR_SZ; + * using ESR_MAGIC here since it is not checked for size nor + * is limited to one instance. + * + * At first inject an additional normal esr_context + */ + head->magic = ESR_MAGIC; + head->size = sizeof(struct esr_context); + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_GOOD_CONTEXT(&sf.uc); + + /* + * now mess with fake esr_context size: leaving less space than + * needed while keeping size value 16-aligned + * + * It must trigger a SEGV from Kernel on: + * + * resv_sz - offset < sizeof(*head) + */ + /* at first set the maximum good 16-aligned size */ + head->size = (resv_sz - offset - need_sz + MIN_SZ_ALIGN) & ~0xfUL; + /* plus a bit more of 16-aligned sized stuff */ + head->size += MIN_SZ_ALIGN; + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE", + .descr = "Triggers a sigreturn with a overrun __reserved area", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c new file mode 100644 index 000000000000..a44b88bfc81a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a badly sized terminator + * record: on sigreturn Kernel must spot this attempt and the test case + * is expected to be terminated via SEGV. + */ + +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* at least HDR_SZ for the badly sized terminator. */ + head = get_starting_head(shead, HDR_SZ, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + head->magic = 0; + head->size = HDR_SZ; + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE_FOR_TERMINATOR", + .descr = "Trigger a sigreturn using non-zero size terminator", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_for_magic0_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c new file mode 100644 index 000000000000..afe8915f0998 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including an additional FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ, + GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* Add a spurious fpsimd_context */ + head->magic = FPSIMD_MAGIC; + head->size = sizeof(struct fpsimd_context); + /* and terminate */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_DUPLICATED_FPSIMD", + .descr = "Triggers a sigreturn including two fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_duplicated_fpsimd_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c new file mode 100644 index 000000000000..1e089e66f9f3 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack at a misaligned SP: on sigreturn + * Kernel must spot this attempt and the test case is expected to be + * terminated via SEGV. + */ + +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_misaligned_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* Forcing sigframe on misaligned SP (16 + 3) */ + fake_sigreturn(&sf, sizeof(sf), 3); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISALIGNED_SP", + .descr = "Triggers a sigreturn with a misaligned sigframe", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_misaligned_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c new file mode 100644 index 000000000000..08ecd8073a1a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack missing the mandatory FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include <stdio.h> +#include <signal.h> +#include <ucontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, FPSIMD_MAGIC, resv_sz, &offset); + if (head && resv_sz - offset >= HDR_SZ) { + fprintf(stderr, "Mangling template header. Spare space:%zd\n", + resv_sz - offset); + /* Just overwrite fpsmid_context */ + write_terminator_record(head); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + } + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISSING_FPSIMD", + .descr = "Triggers a sigreturn with a missing fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_missing_fpsimd_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c new file mode 100644 index 000000000000..2cb118b0ba05 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the execution state bit: this attempt must be spotted by Kernel and + * the test case is expected to be terminated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* This config should trigger a SIGSEGV by Kernel */ + uc->uc_mcontext.pstate ^= PSR_MODE32_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_STATE_TOGGLE", + .descr = "Mangling uc_mcontext with INVALID STATE_TOGGLE", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c new file mode 100644 index 000000000000..434b82597007 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, mangling the + * DAIF bits in an illegal manner: this attempt must be spotted by Kernel + * and the test case is expected to be terminated via SEGV. + * + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* + * This config should trigger a SIGSEGV by Kernel when it checks + * the sigframe consistency in valid_user_regs() routine. + */ + uc->uc_mcontext.pstate |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_DAIF_BITS", + .descr = "Mangling uc_mcontext with INVALID DAIF_BITS", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c new file mode 100644 index 000000000000..95f821abdf46 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c new file mode 100644 index 000000000000..cc222d8a618a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c new file mode 100644 index 000000000000..2188add7d28c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c new file mode 100644 index 000000000000..df32dd5a479c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c new file mode 100644 index 000000000000..9e6829b7e5db --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c new file mode 100644 index 000000000000..5685a4f10d06 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h new file mode 100644 index 000000000000..f5bf1804d858 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 ARM Limited + * + * Utility macro to ease definition of testcases toggling mode EL + */ + +#define DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(_mode) \ + \ +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, \ + ucontext_t *uc) \ +{ \ + ASSERT_GOOD_CONTEXT(uc); \ + \ + uc->uc_mcontext.pstate &= ~PSR_MODE_MASK; \ + uc->uc_mcontext.pstate |= PSR_MODE_EL ## _mode; \ + \ + return 1; \ +} \ + \ +struct tdescr tde = { \ + .sanity_disabled = true, \ + .name = "MANGLE_PSTATE_INVALID_MODE_EL"#_mode, \ + .descr = "Mangling uc_mcontext INVALID MODE EL"#_mode, \ + .sig_trig = SIGUSR1, \ + .sig_ok = SIGSEGV, \ + .run = mangle_invalid_pstate_run, \ +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c new file mode 100644 index 000000000000..61ebcdf63831 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ +#include "testcases.h" + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *found = NULL; + + if (!head || resv_sz < HDR_SZ) + return found; + + while (offs <= resv_sz - HDR_SZ && + head->magic != magic && head->magic) { + offs += head->size; + head = GET_RESV_NEXT_HEAD(head); + } + if (head->magic == magic) { + found = head; + if (offset) + *offset = offs; + } + + return found; +} + +bool validate_extra_context(struct extra_context *extra, char **err) +{ + struct _aarch64_ctx *term; + + if (!extra || !err) + return false; + + fprintf(stderr, "Validating EXTRA...\n"); + term = GET_RESV_NEXT_HEAD(extra); + if (!term || term->magic || term->size) { + *err = "Missing terminator after EXTRA context"; + return false; + } + if (extra->datap & 0x0fUL) + *err = "Extra DATAP misaligned"; + else if (extra->size & 0x0fUL) + *err = "Extra SIZE misaligned"; + else if (extra->datap != (uint64_t)term + sizeof(*term)) + *err = "Extra DATAP misplaced (not contiguous)"; + if (*err) + return false; + + return true; +} + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) +{ + bool terminated = false; + size_t offs = 0; + int flags = 0; + struct extra_context *extra = NULL; + struct _aarch64_ctx *head = + (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; + + if (!err) + return false; + /* Walk till the end terminator verifying __reserved contents */ + while (head && !terminated && offs < resv_sz) { + if ((uint64_t)head & 0x0fUL) { + *err = "Misaligned HEAD"; + return false; + } + + switch (head->magic) { + case 0: + if (head->size) + *err = "Bad size for terminator"; + else + terminated = true; + break; + case FPSIMD_MAGIC: + if (flags & FPSIMD_CTX) + *err = "Multiple FPSIMD_MAGIC"; + else if (head->size != + sizeof(struct fpsimd_context)) + *err = "Bad size for fpsimd_context"; + flags |= FPSIMD_CTX; + break; + case ESR_MAGIC: + if (head->size != sizeof(struct esr_context)) + *err = "Bad size for esr_context"; + break; + case SVE_MAGIC: + if (flags & SVE_CTX) + *err = "Multiple SVE_MAGIC"; + else if (head->size != + sizeof(struct sve_context)) + *err = "Bad size for sve_context"; + flags |= SVE_CTX; + break; + case EXTRA_MAGIC: + if (flags & EXTRA_CTX) + *err = "Multiple EXTRA_MAGIC"; + else if (head->size != + sizeof(struct extra_context)) + *err = "Bad size for extra_context"; + flags |= EXTRA_CTX; + extra = (struct extra_context *)head; + break; + case KSFT_BAD_MAGIC: + /* + * This is a BAD magic header defined + * artificially by a testcase and surely + * unknown to the Kernel parse_user_sigframe(). + * It MUST cause a Kernel induced SEGV + */ + *err = "BAD MAGIC !"; + break; + default: + /* + * A still unknown Magic: potentially freshly added + * to the Kernel code and still unknown to the + * tests. + */ + fprintf(stdout, + "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n", + head->magic); + break; + } + + if (*err) + return false; + + offs += head->size; + if (resv_sz < offs + sizeof(*head)) { + *err = "HEAD Overrun"; + return false; + } + + if (flags & EXTRA_CTX) + if (!validate_extra_context(extra, err)) + return false; + + head = GET_RESV_NEXT_HEAD(head); + } + + if (terminated && !(flags & FPSIMD_CTX)) { + *err = "Missing FPSIMD"; + return false; + } + + return true; +} + +/* + * This function walks through the records inside the provided reserved area + * trying to find enough space to fit @need_sz bytes: if not enough space is + * available and an extra_context record is present, it throws away the + * extra_context record. + * + * It returns a pointer to a new header where it is possible to start storing + * our need_sz bytes. + * + * @shead: points to the start of reserved area + * @need_sz: needed bytes + * @resv_sz: reserved area size in bytes + * @offset: if not null, this will be filled with the offset of the return + * head pointer from @shead + * + * @return: pointer to a new head where to start storing need_sz bytes, or + * NULL if space could not be made available. + */ +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *head; + + head = get_terminator(shead, resv_sz, &offs); + /* not found a terminator...no need to update offset if any */ + if (!head) + return head; + if (resv_sz - offs < need_sz) { + fprintf(stderr, "Low on space:%zd. Discarding extra_context.\n", + resv_sz - offs); + head = get_header(shead, EXTRA_MAGIC, resv_sz, &offs); + if (!head || resv_sz - offs < need_sz) { + fprintf(stderr, + "Failed to reclaim space on sigframe.\n"); + return NULL; + } + } + + fprintf(stderr, "Available space:%zd\n", resv_sz - offs); + if (offset) + *offset = offs; + return head; +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h new file mode 100644 index 000000000000..ad884c135314 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ +#ifndef __TESTCASES_H__ +#define __TESTCASES_H__ + +#include <stddef.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdlib.h> +#include <ucontext.h> +#include <signal.h> + +/* Architecture specific sigframe definitions */ +#include <asm/sigcontext.h> + +#define FPSIMD_CTX (1 << 0) +#define SVE_CTX (1 << 1) +#define EXTRA_CTX (1 << 2) + +#define KSFT_BAD_MAGIC 0xdeadbeef + +#define HDR_SZ \ + sizeof(struct _aarch64_ctx) + +#define GET_SF_RESV_HEAD(sf) \ + (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) + +#define GET_SF_RESV_SIZE(sf) \ + sizeof((sf).uc.uc_mcontext.__reserved) + +#define GET_UCP_RESV_SIZE(ucp) \ + sizeof((ucp)->uc_mcontext.__reserved) + +#define ASSERT_BAD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Using badly built context - ERR: %s\n",\ + err); \ + } else { \ + abort(); \ + } \ +} while (0) + +#define ASSERT_GOOD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Detected BAD context - ERR: %s\n", err);\ + abort(); \ + } else { \ + fprintf(stderr, "uc context validated.\n"); \ + } \ +} while (0) + +/* + * A simple record-walker for __reserved area: it walks through assuming + * only to find a proper struct __aarch64_ctx header descriptor. + * + * Instead it makes no assumptions on the content and ordering of the + * records, any needed bounds checking must be enforced by the caller + * if wanted: this way can be used by caller on any maliciously built bad + * contexts. + * + * head->size accounts both for payload and header _aarch64_ctx size ! + */ +#define GET_RESV_NEXT_HEAD(h) \ + (struct _aarch64_ctx *)((char *)(h) + (h)->size) + +struct fake_sigframe { + siginfo_t info; + ucontext_t uc; +}; + + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); + +bool validate_extra_context(struct extra_context *extra, char **err); + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset); + +static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, + size_t resv_sz, + size_t *offset) +{ + return get_header(head, 0, resv_sz, offset); +} + +static inline void write_terminator_record(struct _aarch64_ctx *tail) +{ + if (tail) { + tail->magic = 0; + tail->size = 0; + } +} + +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset); +#endif diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore new file mode 100644 index 000000000000..e8fae8d61ed6 --- /dev/null +++ b/tools/testing/selftests/arm64/tags/.gitignore @@ -0,0 +1 @@ +tags_test diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile new file mode 100644 index 000000000000..41cb75070511 --- /dev/null +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := tags_test +TEST_PROGS := run_tags_test.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh new file mode 100755 index 000000000000..745f11379930 --- /dev/null +++ b/tools/testing/selftests/arm64/tags/run_tags_test.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +echo "--------------------" +echo "running tags test" +echo "--------------------" +./tags_test +if [ $? -ne 0 ]; then + echo "[FAIL]" +else + echo "[PASS]" +fi diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c new file mode 100644 index 000000000000..5701163460ef --- /dev/null +++ b/tools/testing/selftests/arm64/tags/tags_test.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/prctl.h> +#include <sys/utsname.h> + +#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56) +#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \ + SHIFT_TAG(tag)) + +int main(void) +{ + static int tbi_enabled = 0; + unsigned long tag = 0; + struct utsname *ptr; + int err; + + if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0) + tbi_enabled = 1; + ptr = (struct utsname *)malloc(sizeof(*ptr)); + if (tbi_enabled) + tag = 0x42; + ptr = (struct utsname *)SET_TAG(ptr, tag); + err = uname(ptr); + free(ptr); + + return err; +} diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 90f70d2c7c22..ec464859c6b6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -7,11 +7,10 @@ FEATURE-DUMP.libbpf fixdep test_align test_dev_cgroup -test_progs +/test_progs* test_tcpbpf_user test_verifier_log feature -test_libbpf_open test_sock test_sock_addr test_sock_fields @@ -22,24 +21,20 @@ test_lirc_mode2_user get_cgroup_id_user test_skb_cgroup_id_user test_socket_cookie -test_cgroup_attach test_cgroup_storage -test_select_reuseport test_flow_dissector flow_dissector_load test_netcnt -test_section_names test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -alu32 -libbpf.pc -libbpf.so.* test_hashmap test_btf_dump xdping -test_sockopt -test_sockopt_sk -test_sockopt_multi -test_tcp_rtt +test_cpp +*.skel.h +/no_alu32 +/bpf_gcc +/tools + diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c085964e1d05..257a1aaaa37d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -2,10 +2,14 @@ include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch -LIBDIR := ../../../lib +CURDIR := $(abspath .) +TOOLSDIR := $(abspath ../../..) +LIBDIR := $(TOOLSDIR)/lib BPFDIR := $(LIBDIR)/bpf -APIDIR := ../../../include/uapi -GENDIR := ../../../../include/generated +TOOLSINCDIR := $(TOOLSDIR)/include +BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool +APIDIR := $(TOOLSINCDIR)/uapi +GENDIR := $(abspath ../../../../include/generated) GENHDR := $(GENDIR)/autoconf.h ifneq ($(wildcard $(GENHDR)),) @@ -15,38 +19,32 @@ endif CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy -LLVM_READELF ?= llvm-readelf -BTF_PAHOLE ?= pahole -CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ - -Dbpf_prog_load=bpf_prog_test_load \ +BPF_GCC ?= $(shell command -v bpf-gcc;) +CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ + -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ + -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program -LDLIBS += -lcap -lelf -lrt -lpthread +LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ - test_cgroup_storage test_select_reuseport test_section_names \ + test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_btf_dump test_cgroup_attach xdping test_sockopt test_sockopt_sk \ - test_sockopt_multi test_tcp_rtt - -BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) -TEST_GEN_FILES = $(BPF_OBJ_FILES) - -# Also test sub-register code-gen if LLVM has eBPF v3 processor support which -# contains both ALU32 and JMP32 instructions. -SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ - $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \ - $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \ - grep 'if w') -ifneq ($(SUBREG_CODEGEN),) -TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES)) + test_progs-no_alu32 + +# Also test bpf-gcc, if present +ifneq ($(BPF_GCC),) +TEST_GEN_PROGS += test_progs-bpf_gcc endif +TEST_GEN_FILES = +TEST_FILES = test_lwt_ip_encap.o \ + test_tc_edt.o + # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ - test_libbpf.sh \ test_xdp_redirect.sh \ test_xdp_meta.sh \ test_xdp_veth.sh \ @@ -63,35 +61,83 @@ TEST_PROGS := test_kmod.sh \ test_tcp_check_syncookie.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ - test_xdping.sh + test_xdping.sh \ + test_bpftool_build.sh TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ tcp_client.py \ - tcp_server.py + tcp_server.py \ + test_xdp_vlan.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ +TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user - -include ../lib.mk - -# NOTE: $(OUTPUT) won't get default value if used before lib.mk -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read -all: $(TEST_CUSTOM_PROGS) - -$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c - $(CC) -o $@ $< -Wl,--build-id - -$(OUTPUT)/test_stub.o: test_stub.c - $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $< + test_lirc_mode2_user xdping test_cpp runqslower + +TEST_CUSTOM_PROGS = urandom_read + +# Emit succinct information message describing current building step +# $1 - generic step name (e.g., CC, LINK, etc); +# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; +# $3 - target (assumed to be file); only file name will be emitted; +# $4 - optional extra arg, emitted as-is, if provided. +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif -BPFOBJ := $(OUTPUT)/libbpf.a +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +override define CLEAN + $(call msg,CLEAN) + $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) +endef -$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ) +include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +INCLUDE_DIR := $(SCRATCH_DIR)/include +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# to build individual tests. +# NOTE: Semicolon at the end is critical to override lib.mk's default static +# rule for binaries. +$(notdir $(TEST_GEN_PROGS) \ + $(TEST_PROGS) \ + $(TEST_PROGS_EXTENDED) \ + $(TEST_GEN_PROGS_EXTENDED) \ + $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; + +$(OUTPUT)/%:%.c + $(call msg,BINARY,,$@) + $(LINK.c) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/urandom_read: urandom_read.c + $(call msg,BINARY,,$@) + $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id + +$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) + $(call msg,CC,,$@) + $(CC) -c $(CFLAGS) -o $@ $< + +VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) +$(OUTPUT)/runqslower: $(BPFOBJ) + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ + BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) + +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c @@ -101,35 +147,29 @@ $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c -$(OUTPUT)/test_progs: trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c -$(OUTPUT)/test_sockopt: cgroup_helpers.c -$(OUTPUT)/test_sockopt_sk: cgroup_helpers.c -$(OUTPUT)/test_sockopt_multi: cgroup_helpers.c -$(OUTPUT)/test_tcp_rtt: cgroup_helpers.c -.PHONY: force - -# force a rebuild of BPFOBJ when its dependencies are updated -force: - -$(BPFOBJ): force - $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ - -PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) - -# Let newer LLVM versions transparently probe the kernel for availability -# of full BPF instruction set. -ifeq ($(PROBE),) - CPU ?= probe -else - CPU ?= generic -endif +DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ + $(BPFOBJ) | $(BUILD_DIR)/bpftool + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + OUTPUT=$(BUILD_DIR)/bpftool/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + ../../../include/uapi/linux/bpf.h \ + | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): + $(call msg,MKDIR,,$@) + mkdir -p $@ # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -137,157 +177,222 @@ endif # # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. -CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \ +define get_sys_includes +$(shell $(1) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') +endef -CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \ - $(CLANG_SYS_INCLUDES) \ - -Wno-compare-distinct-pointer-types \ - -D__TARGET_ARCH_$(SRCARCH) +# Determine target endianness. +IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ + grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') +MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) -$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline -$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) +BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ + -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) -$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h -$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h +CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ + -Wno-compare-distinct-pointer-types -$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h -$(OUTPUT)/test_progs.o: flow_dissector_load.h - -BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris) -BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF) -BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm') -BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ - $(CLANG) -target bpf -O2 -g -c -x c - -o ./llvm_btf_verify.o; \ - $(LLVM_READELF) -S ./llvm_btf_verify.o | grep BTF; \ - /bin/rm -f ./llvm_btf_verify.o) - -ifneq ($(BTF_LLVM_PROBE),) - CLANG_FLAGS += -g -else -ifneq ($(BTF_LLC_PROBE),) -ifneq ($(BTF_PAHOLE_PROBE),) -ifneq ($(BTF_OBJCOPY_PROBE),) - CLANG_FLAGS += -g - LLC_FLAGS += -mattr=dwarfris - DWARF2BTF = y -endif -endif -endif -endif - -TEST_PROGS_CFLAGS := -I. -I$(OUTPUT) -TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) -TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier +$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline +$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline -ifneq ($(SUBREG_CODEGEN),) -ALU32_BUILD_DIR = $(OUTPUT)/alu32 -TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32 -$(ALU32_BUILD_DIR): - mkdir -p $@ +$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h -$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR) - cp $< $@ - -$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ - $(ALU32_BUILD_DIR)/urandom_read \ - | $(ALU32_BUILD_DIR) - $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ - -o $(ALU32_BUILD_DIR)/test_progs_32 \ - test_progs.c test_stub.c trace_helpers.c prog_tests/*.c \ - $(OUTPUT)/libbpf.a $(LDLIBS) - -$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) -$(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c - -$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \ - | $(ALU32_BUILD_DIR) - ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ - echo "clang failed") | \ - $(LLC) -march=bpf -mattr=+alu32 -mcpu=$(CPU) $(LLC_FLAGS) \ - -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ -endif +# Build BPF object using Clang +# $1 - input .c file +# $2 - output .o file +# $3 - CFLAGS +# $4 - LDFLAGS +define CLANG_BPF_BUILD_RULE + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) + ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + -c $1 -o - || echo "BPF obj compilation failed") | \ + $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 +endef +# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 +define CLANG_NOALU32_BPF_BUILD_RULE + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) + ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + -c $1 -o - || echo "BPF obj compilation failed") | \ + $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 +endef +# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC +define CLANG_NATIVE_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + ($(CLANG) $3 -O2 -emit-llvm \ + -c $1 -o - || echo "BPF obj compilation failed") | \ + $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 +endef +# Build BPF object using GCC +define GCC_BPF_BUILD_RULE + $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) + $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 +endef + +SKEL_BLACKLIST := btf__% test_pinning_invalid.c + +# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on +# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. +# Parameters: +# $1 - test runner base binary name (e.g., test_progs) +# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) +define DEFINE_TEST_RUNNER + +TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 +TRUNNER_BINARY := $1$(if $2,-)$2 +TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \ + $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c))) +TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ + $$(filter %.c,$(TRUNNER_EXTRA_SOURCES))) +TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES)) +TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h +TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) +TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)) +TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ + $$(filter-out $(SKEL_BLACKLIST), \ + $$(TRUNNER_BPF_SRCS))) + +# Evaluate rules now with extra TRUNNER_XXX variables above already defined +$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2)) + +endef + +# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and +# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: +# $1 - test runner base binary name (e.g., test_progs) +# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) +define DEFINE_TEST_RUNNER_RULES + +ifeq ($($(TRUNNER_OUTPUT)-dir),) +$(TRUNNER_OUTPUT)-dir := y +$(TRUNNER_OUTPUT): + $$(call msg,MKDIR,,$$@) + mkdir -p $$@ endif -# Have one program compiled without "-target bpf" to test whether libbpf loads -# it successfully -$(OUTPUT)/test_xdp.o: progs/test_xdp.c - ($(CLANG) $(CLANG_FLAGS) -O2 -emit-llvm -c $< -o - || \ - echo "clang failed") | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ +# ensure we set up BPF objects generation rule just once for a given +# input/output directory combination +ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),) +$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y +$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ + $(TRUNNER_BPF_PROGS_DIR)/%.c \ + $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ + $(TRUNNER_BPF_CFLAGS), \ + $(TRUNNER_BPF_LDFLAGS)) + +$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ + $(TRUNNER_OUTPUT)/%.o \ + | $(BPFTOOL) $(TRUNNER_OUTPUT) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) + $$(BPFTOOL) gen skeleton $$< > $$@ endif -$(OUTPUT)/%.o: progs/%.c - ($(CLANG) $(CLANG_FLAGS) -O2 -target bpf -emit-llvm -c $< -o - || \ - echo "clang failed") | \ - $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ -ifeq ($(DWARF2BTF),y) - $(BTF_PAHOLE) -J $@ +# ensure we set up tests.h header generation rule just once +ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) +$(TRUNNER_TESTS_DIR)-tests-hdr := y +$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c + $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) + $$(shell ( cd $(TRUNNER_TESTS_DIR); \ + echo '/* Generated header, do not edit */'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ + ) > $$@) endif -PROG_TESTS_DIR = $(OUTPUT)/prog_tests -$(PROG_TESTS_DIR): - mkdir -p $@ -PROG_TESTS_H := $(PROG_TESTS_DIR)/tests.h -PROG_TESTS_FILES := $(wildcard prog_tests/*.c) -test_progs.c: $(PROG_TESTS_H) -$(OUTPUT)/test_progs: CFLAGS += $(TEST_PROGS_CFLAGS) -$(OUTPUT)/test_progs: test_progs.c $(PROG_TESTS_H) $(PROG_TESTS_FILES) -$(PROG_TESTS_H): $(PROG_TESTS_FILES) | $(PROG_TESTS_DIR) - $(shell ( cd prog_tests/; \ - echo '/* Generated header, do not edit */'; \ - echo '#ifdef DECLARE'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \ - echo '#endif'; \ - echo '#ifdef CALL'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@test_\1();@'; \ - echo '#endif' \ - ) > $(PROG_TESTS_H)) +# compile individual test files +# Note: we cd into output directory to ensure embedded BPF object is found +$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ + $(TRUNNER_TESTS_DIR)/%.c \ + $(TRUNNER_EXTRA_HDRS) \ + $(TRUNNER_BPF_OBJS) \ + $(TRUNNER_BPF_SKELS) \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) + cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) + +$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ + %.c \ + $(TRUNNER_EXTRA_HDRS) \ + $(TRUNNER_TESTS_HDR) \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) + $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ + +# only copy extra resources if in flavored build +$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) +ifneq ($2,) + $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) + cp -a $$^ $(TRUNNER_OUTPUT)/ +endif -MAP_TESTS_DIR = $(OUTPUT)/map_tests -$(MAP_TESTS_DIR): - mkdir -p $@ -MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h -MAP_TESTS_FILES := $(wildcard map_tests/*.c) -test_maps.c: $(MAP_TESTS_H) -$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) -$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_H) $(MAP_TESTS_FILES) -$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) - $(shell ( cd map_tests/; \ - echo '/* Generated header, do not edit */'; \ - echo '#ifdef DECLARE'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \ - echo '#endif'; \ - echo '#ifdef CALL'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@test_\1();@'; \ - echo '#endif' \ - ) > $(MAP_TESTS_H)) +$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ + $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ + | $(TRUNNER_BINARY)-extras + $$(call msg,BINARY,,$$@) + $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ + +endef + +# Define test_progs test runner. +TRUNNER_TESTS_DIR := prog_tests +TRUNNER_BPF_PROGS_DIR := progs +TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ + flow_dissector_load.h +TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ + $(wildcard progs/btf_dump_test_case_*.c) +TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_LDFLAGS := -mattr=+alu32 +$(eval $(call DEFINE_TEST_RUNNER,test_progs)) + +# Define test_progs-no_alu32 test runner. +TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE +TRUNNER_BPF_LDFLAGS := +$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) + +# Define test_progs BPF-GCC-flavored test runner. +ifneq ($(BPF_GCC),) +TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc) +TRUNNER_BPF_LDFLAGS := +$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc)) +endif -VERIFIER_TESTS_DIR = $(OUTPUT)/verifier -$(VERIFIER_TESTS_DIR): - mkdir -p $@ -VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h -VERIFIER_TEST_FILES := $(wildcard verifier/*.c) -test_verifier.c: $(VERIFIER_TESTS_H) -$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) -$(OUTPUT)/test_verifier: test_verifier.c $(VERIFIER_TESTS_H) -$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) +# Define test_maps test runner. +TRUNNER_TESTS_DIR := map_tests +TRUNNER_BPF_PROGS_DIR := progs +TRUNNER_EXTRA_SOURCES := test_maps.c +TRUNNER_EXTRA_FILES := +TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built) +TRUNNER_BPF_CFLAGS := +TRUNNER_BPF_LDFLAGS := +$(eval $(call DEFINE_TEST_RUNNER,test_maps)) + +# Define test_verifier test runner. +# It is much simpler than test_maps/test_progs and sufficiently different from +# them (e.g., test.h is using completely pattern), that it's worth just +# explicitly defining all the rules explicitly. +verifier/tests.h: verifier/*.c $(shell ( cd verifier/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef FILL_ARRAY'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\(.*\)@#include \"\1\"@'; \ + ls *.c 2> /dev/null | sed -e 's@\(.*\)@#include \"\1\"@'; \ echo '#endif' \ - ) > $(VERIFIER_TESTS_H)) - -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) \ - $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \ - feature + ) > verifier/tests.h) +$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) + $(call msg,BINARY,,$@) + $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ + +# Make sure we are able to include and link libbpf against c++. +$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) + $(call msg,CXX,,$@) + $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ + +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \ + prog_tests/tests.h map_tests/tests.h verifier/tests.h \ + feature \ + $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc) diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h deleted file mode 100644 index 05f036df8a4c..000000000000 --- a/tools/testing/selftests/bpf/bpf_endian.h +++ /dev/null @@ -1,58 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BPF_ENDIAN__ -#define __BPF_ENDIAN__ - -#include <linux/stddef.h> -#include <linux/swab.h> - -/* LLVM's BPF target selects the endianness of the CPU - * it compiles on, or the user specifies (bpfel/bpfeb), - * respectively. The used __BYTE_ORDER__ is defined by - * the compiler, we cannot rely on __BYTE_ORDER from - * libc headers, since it doesn't reflect the actual - * requested byte order. - * - * Note, LLVM's BPF target has different __builtin_bswapX() - * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE - * in bpfel and bpfeb case, which means below, that we map - * to cpu_to_be16(). We could use it unconditionally in BPF - * case, but better not rely on it, so that this header here - * can be used from application and BPF program side, which - * use different targets. - */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -# define __bpf_ntohs(x) __builtin_bswap16(x) -# define __bpf_htons(x) __builtin_bswap16(x) -# define __bpf_constant_ntohs(x) ___constant_swab16(x) -# define __bpf_constant_htons(x) ___constant_swab16(x) -# define __bpf_ntohl(x) __builtin_bswap32(x) -# define __bpf_htonl(x) __builtin_bswap32(x) -# define __bpf_constant_ntohl(x) ___constant_swab32(x) -# define __bpf_constant_htonl(x) ___constant_swab32(x) -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -# define __bpf_ntohs(x) (x) -# define __bpf_htons(x) (x) -# define __bpf_constant_ntohs(x) (x) -# define __bpf_constant_htons(x) (x) -# define __bpf_ntohl(x) (x) -# define __bpf_htonl(x) (x) -# define __bpf_constant_ntohl(x) (x) -# define __bpf_constant_htonl(x) (x) -#else -# error "Fix your compiler's __BYTE_ORDER__?!" -#endif - -#define bpf_htons(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_htons(x) : __bpf_htons(x)) -#define bpf_ntohs(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_ntohs(x) : __bpf_ntohs(x)) -#define bpf_htonl(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_htonl(x) : __bpf_htonl(x)) -#define bpf_ntohl(x) \ - (__builtin_constant_p(x) ? \ - __bpf_constant_ntohl(x) : __bpf_ntohl(x)) - -#endif /* __BPF_ENDIAN__ */ diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h deleted file mode 100644 index f804f210244e..000000000000 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ /dev/null @@ -1,504 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BPF_HELPERS_H -#define __BPF_HELPERS_H - -/* helper macro to place programs, maps, license in - * different sections in elf_bpf file. Section names - * are interpreted by elf_bpf loader - */ -#define SEC(NAME) __attribute__((section(NAME), used)) - -#define __uint(name, val) int (*name)[val] -#define __type(name, val) val *name - -/* helper macro to print out debug messages */ -#define bpf_printk(fmt, ...) \ -({ \ - char ____fmt[] = fmt; \ - bpf_trace_printk(____fmt, sizeof(____fmt), \ - ##__VA_ARGS__); \ -}) - -/* helper functions called from eBPF programs written in C */ -static void *(*bpf_map_lookup_elem)(void *map, const void *key) = - (void *) BPF_FUNC_map_lookup_elem; -static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_update_elem; -static int (*bpf_map_delete_elem)(void *map, const void *key) = - (void *) BPF_FUNC_map_delete_elem; -static int (*bpf_map_push_elem)(void *map, const void *value, - unsigned long long flags) = - (void *) BPF_FUNC_map_push_elem; -static int (*bpf_map_pop_elem)(void *map, void *value) = - (void *) BPF_FUNC_map_pop_elem; -static int (*bpf_map_peek_elem)(void *map, void *value) = - (void *) BPF_FUNC_map_peek_elem; -static int (*bpf_probe_read)(void *dst, int size, const void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read; -static unsigned long long (*bpf_ktime_get_ns)(void) = - (void *) BPF_FUNC_ktime_get_ns; -static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) = - (void *) BPF_FUNC_trace_printk; -static void (*bpf_tail_call)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_tail_call; -static unsigned long long (*bpf_get_smp_processor_id)(void) = - (void *) BPF_FUNC_get_smp_processor_id; -static unsigned long long (*bpf_get_current_pid_tgid)(void) = - (void *) BPF_FUNC_get_current_pid_tgid; -static unsigned long long (*bpf_get_current_uid_gid)(void) = - (void *) BPF_FUNC_get_current_uid_gid; -static int (*bpf_get_current_comm)(void *buf, int buf_size) = - (void *) BPF_FUNC_get_current_comm; -static unsigned long long (*bpf_perf_event_read)(void *map, - unsigned long long flags) = - (void *) BPF_FUNC_perf_event_read; -static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = - (void *) BPF_FUNC_clone_redirect; -static int (*bpf_redirect)(int ifindex, int flags) = - (void *) BPF_FUNC_redirect; -static int (*bpf_redirect_map)(void *map, int key, int flags) = - (void *) BPF_FUNC_redirect_map; -static int (*bpf_perf_event_output)(void *ctx, void *map, - unsigned long long flags, void *data, - int size) = - (void *) BPF_FUNC_perf_event_output; -static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = - (void *) BPF_FUNC_get_stackid; -static int (*bpf_probe_write_user)(void *dst, const void *src, int size) = - (void *) BPF_FUNC_probe_write_user; -static int (*bpf_current_task_under_cgroup)(void *map, int index) = - (void *) BPF_FUNC_current_task_under_cgroup; -static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_get_tunnel_key; -static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = - (void *) BPF_FUNC_skb_set_tunnel_key; -static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_get_tunnel_opt; -static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = - (void *) BPF_FUNC_skb_set_tunnel_opt; -static unsigned long long (*bpf_get_prandom_u32)(void) = - (void *) BPF_FUNC_get_prandom_u32; -static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_head; -static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_meta; -static int (*bpf_get_socket_cookie)(void *ctx) = - (void *) BPF_FUNC_get_socket_cookie; -static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, - int optlen) = - (void *) BPF_FUNC_setsockopt; -static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval, - int optlen) = - (void *) BPF_FUNC_getsockopt; -static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) = - (void *) BPF_FUNC_sock_ops_cb_flags_set; -static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) = - (void *) BPF_FUNC_sk_redirect_map; -static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) = - (void *) BPF_FUNC_sk_redirect_hash; -static int (*bpf_sock_map_update)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_sock_map_update; -static int (*bpf_sock_hash_update)(void *map, void *key, void *value, - unsigned long long flags) = - (void *) BPF_FUNC_sock_hash_update; -static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags, - void *buf, unsigned int buf_size) = - (void *) BPF_FUNC_perf_event_read_value; -static int (*bpf_perf_prog_read_value)(void *ctx, void *buf, - unsigned int buf_size) = - (void *) BPF_FUNC_perf_prog_read_value; -static int (*bpf_override_return)(void *ctx, unsigned long rc) = - (void *) BPF_FUNC_override_return; -static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) = - (void *) BPF_FUNC_msg_redirect_map; -static int (*bpf_msg_redirect_hash)(void *ctx, - void *map, void *key, int flags) = - (void *) BPF_FUNC_msg_redirect_hash; -static int (*bpf_msg_apply_bytes)(void *ctx, int len) = - (void *) BPF_FUNC_msg_apply_bytes; -static int (*bpf_msg_cork_bytes)(void *ctx, int len) = - (void *) BPF_FUNC_msg_cork_bytes; -static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) = - (void *) BPF_FUNC_msg_pull_data; -static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) = - (void *) BPF_FUNC_msg_push_data; -static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) = - (void *) BPF_FUNC_msg_pop_data; -static int (*bpf_bind)(void *ctx, void *addr, int addr_len) = - (void *) BPF_FUNC_bind; -static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) = - (void *) BPF_FUNC_xdp_adjust_tail; -static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state, - int size, int flags) = - (void *) BPF_FUNC_skb_get_xfrm_state; -static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) = - (void *) BPF_FUNC_sk_select_reuseport; -static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) = - (void *) BPF_FUNC_get_stack; -static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, - int plen, __u32 flags) = - (void *) BPF_FUNC_fib_lookup; -static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr, - unsigned int len) = - (void *) BPF_FUNC_lwt_push_encap; -static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset, - void *from, unsigned int len) = - (void *) BPF_FUNC_lwt_seg6_store_bytes; -static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param, - unsigned int param_len) = - (void *) BPF_FUNC_lwt_seg6_action; -static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset, - unsigned int len) = - (void *) BPF_FUNC_lwt_seg6_adjust_srh; -static int (*bpf_rc_repeat)(void *ctx) = - (void *) BPF_FUNC_rc_repeat; -static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol, - unsigned long long scancode, unsigned int toggle) = - (void *) BPF_FUNC_rc_keydown; -static unsigned long long (*bpf_get_current_cgroup_id)(void) = - (void *) BPF_FUNC_get_current_cgroup_id; -static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) = - (void *) BPF_FUNC_get_local_storage; -static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) = - (void *) BPF_FUNC_skb_cgroup_id; -static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) = - (void *) BPF_FUNC_skb_ancestor_cgroup_id; -static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_sk_lookup_tcp; -static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_skc_lookup_tcp; -static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, - struct bpf_sock_tuple *tuple, - int size, unsigned long long netns_id, - unsigned long long flags) = - (void *) BPF_FUNC_sk_lookup_udp; -static int (*bpf_sk_release)(struct bpf_sock *sk) = - (void *) BPF_FUNC_sk_release; -static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) = - (void *) BPF_FUNC_skb_vlan_push; -static int (*bpf_skb_vlan_pop)(void *ctx) = - (void *) BPF_FUNC_skb_vlan_pop; -static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) = - (void *) BPF_FUNC_rc_pointer_rel; -static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) = - (void *) BPF_FUNC_spin_lock; -static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = - (void *) BPF_FUNC_spin_unlock; -static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_sk_fullsock; -static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_tcp_sock; -static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = - (void *) BPF_FUNC_get_listener_sock; -static int (*bpf_skb_ecn_set_ce)(void *ctx) = - (void *) BPF_FUNC_skb_ecn_set_ce; -static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, - void *ip, int ip_len, void *tcp, int tcp_len) = - (void *) BPF_FUNC_tcp_check_syncookie; -static int (*bpf_sysctl_get_name)(void *ctx, char *buf, - unsigned long long buf_len, - unsigned long long flags) = - (void *) BPF_FUNC_sysctl_get_name; -static int (*bpf_sysctl_get_current_value)(void *ctx, char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_get_current_value; -static int (*bpf_sysctl_get_new_value)(void *ctx, char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_get_new_value; -static int (*bpf_sysctl_set_new_value)(void *ctx, const char *buf, - unsigned long long buf_len) = - (void *) BPF_FUNC_sysctl_set_new_value; -static int (*bpf_strtol)(const char *buf, unsigned long long buf_len, - unsigned long long flags, long *res) = - (void *) BPF_FUNC_strtol; -static int (*bpf_strtoul)(const char *buf, unsigned long long buf_len, - unsigned long long flags, unsigned long *res) = - (void *) BPF_FUNC_strtoul; -static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, - void *value, __u64 flags) = - (void *) BPF_FUNC_sk_storage_get; -static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = - (void *)BPF_FUNC_sk_storage_delete; -static int (*bpf_send_signal)(unsigned sig) = (void *)BPF_FUNC_send_signal; - -/* llvm builtin functions that eBPF C program may use to - * emit BPF_LD_ABS and BPF_LD_IND instructions - */ -struct sk_buff; -unsigned long long load_byte(void *skb, - unsigned long long off) asm("llvm.bpf.load.byte"); -unsigned long long load_half(void *skb, - unsigned long long off) asm("llvm.bpf.load.half"); -unsigned long long load_word(void *skb, - unsigned long long off) asm("llvm.bpf.load.word"); - -/* a helper structure used by eBPF C program - * to describe map attributes to elf_bpf loader - */ -struct bpf_map_def { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ - struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ - }; \ - struct ____btf_map_##name \ - __attribute__ ((section(".maps." #name), used)) \ - ____btf_map_##name = { } - -static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = - (void *) BPF_FUNC_skb_load_bytes; -static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) = - (void *) BPF_FUNC_skb_load_bytes_relative; -static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = - (void *) BPF_FUNC_skb_store_bytes; -static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l3_csum_replace; -static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = - (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) = - (void *) BPF_FUNC_csum_diff; -static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_under_cgroup; -static int (*bpf_skb_change_head)(void *, int len, int flags) = - (void *) BPF_FUNC_skb_change_head; -static int (*bpf_skb_pull_data)(void *, int len) = - (void *) BPF_FUNC_skb_pull_data; -static unsigned int (*bpf_get_cgroup_classid)(void *ctx) = - (void *) BPF_FUNC_get_cgroup_classid; -static unsigned int (*bpf_get_route_realm)(void *ctx) = - (void *) BPF_FUNC_get_route_realm; -static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) = - (void *) BPF_FUNC_skb_change_proto; -static int (*bpf_skb_change_type)(void *ctx, __u32 type) = - (void *) BPF_FUNC_skb_change_type; -static unsigned int (*bpf_get_hash_recalc)(void *ctx) = - (void *) BPF_FUNC_get_hash_recalc; -static unsigned long long (*bpf_get_current_task)(void) = - (void *) BPF_FUNC_get_current_task; -static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) = - (void *) BPF_FUNC_skb_change_tail; -static long long (*bpf_csum_update)(void *ctx, __u32 csum) = - (void *) BPF_FUNC_csum_update; -static void (*bpf_set_hash_invalid)(void *ctx) = - (void *) BPF_FUNC_set_hash_invalid; -static int (*bpf_get_numa_node_id)(void) = - (void *) BPF_FUNC_get_numa_node_id; -static int (*bpf_probe_read_str)(void *ctx, __u32 size, - const void *unsafe_ptr) = - (void *) BPF_FUNC_probe_read_str; -static unsigned int (*bpf_get_socket_uid)(void *ctx) = - (void *) BPF_FUNC_get_socket_uid; -static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) = - (void *) BPF_FUNC_set_hash; -static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode, - unsigned long long flags) = - (void *) BPF_FUNC_skb_adjust_room; - -/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ -#if defined(__TARGET_ARCH_x86) - #define bpf_target_x86 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_s390) - #define bpf_target_s390 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm) - #define bpf_target_arm - #define bpf_target_defined -#elif defined(__TARGET_ARCH_arm64) - #define bpf_target_arm64 - #define bpf_target_defined -#elif defined(__TARGET_ARCH_mips) - #define bpf_target_mips - #define bpf_target_defined -#elif defined(__TARGET_ARCH_powerpc) - #define bpf_target_powerpc - #define bpf_target_defined -#elif defined(__TARGET_ARCH_sparc) - #define bpf_target_sparc - #define bpf_target_defined -#else - #undef bpf_target_defined -#endif - -/* Fall back to what the compiler says */ -#ifndef bpf_target_defined -#if defined(__x86_64__) - #define bpf_target_x86 -#elif defined(__s390__) - #define bpf_target_s390 -#elif defined(__arm__) - #define bpf_target_arm -#elif defined(__aarch64__) - #define bpf_target_arm64 -#elif defined(__mips__) - #define bpf_target_mips -#elif defined(__powerpc__) - #define bpf_target_powerpc -#elif defined(__sparc__) - #define bpf_target_sparc -#endif -#endif - -#if defined(bpf_target_x86) - -#ifdef __KERNEL__ -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) -#else -#ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) -#endif -#endif - -#elif defined(bpf_target_s390) - -/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#elif defined(bpf_target_arm) - -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#elif defined(bpf_target_arm64) - -/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#elif defined(bpf_target_mips) - -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[1]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#elif defined(bpf_target_powerpc) - -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#elif defined(bpf_target_sparc) - -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -/* Should this also be a bpf_target check for the sparc case? */ -#if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#else -#define PT_REGS_IP(x) ((x)->pc) -#endif - -#endif - -#if defined(bpf_target_powerpc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_sparc) -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); }) -#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP -#else -#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) -#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \ - bpf_probe_read(&(ip), sizeof(ip), \ - (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) -#endif - -#endif diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h new file mode 100644 index 000000000000..6f8988738bc1 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_LEGACY__ +#define __BPF_LEGACY__ + +/* + * legacy bpf_map_def with extra fields supported only by bpf_load(), do not + * use outside of samples/bpf + */ +struct bpf_map_def_legacy { + unsigned int type; + unsigned int key_size; + unsigned int value_size; + unsigned int max_entries; + unsigned int map_flags; + unsigned int inner_map_idx; + unsigned int numa_node; +}; + +#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ + struct ____btf_map_##name { \ + type_key key; \ + type_val value; \ + }; \ + struct ____btf_map_##name \ + __attribute__ ((section(".maps." #name), used)) \ + ____btf_map_##name = { } + +/* llvm builtin functions that eBPF C program may use to + * emit BPF_LD_ABS and BPF_LD_IND instructions + */ +unsigned long long load_byte(void *skb, + unsigned long long off) asm("llvm.bpf.load.byte"); +unsigned long long load_half(void *skb, + unsigned long long off) asm("llvm.bpf.load.half"); +unsigned long long load_word(void *skb, + unsigned long long off) asm("llvm.bpf.load.word"); + +#endif + diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h new file mode 100644 index 000000000000..8f21965ffc6c --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_TCP_HELPERS_H +#define __BPF_TCP_HELPERS_H + +#include <stdbool.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_trace_helpers.h" + +#define BPF_STRUCT_OPS(name, args...) \ +SEC("struct_ops/"#name) \ +BPF_PROG(name, args) + +#define tcp_jiffies32 ((__u32)bpf_jiffies64()) + +struct sock_common { + unsigned char skc_state; +} __attribute__((preserve_access_index)); + +enum sk_pacing { + SK_PACING_NONE = 0, + SK_PACING_NEEDED = 1, + SK_PACING_FQ = 2, +}; + +struct sock { + struct sock_common __sk_common; + unsigned long sk_pacing_rate; + __u32 sk_pacing_status; /* see enum sk_pacing */ +} __attribute__((preserve_access_index)); + +struct inet_sock { + struct sock sk; +} __attribute__((preserve_access_index)); + +struct inet_connection_sock { + struct inet_sock icsk_inet; + __u8 icsk_ca_state:6, + icsk_ca_setsockopt:1, + icsk_ca_dst_locked:1; + struct { + __u8 pending; + } icsk_ack; + __u64 icsk_ca_priv[104 / sizeof(__u64)]; +} __attribute__((preserve_access_index)); + +struct tcp_sock { + struct inet_connection_sock inet_conn; + + __u32 rcv_nxt; + __u32 snd_nxt; + __u32 snd_una; + __u8 ecn_flags; + __u32 delivered; + __u32 delivered_ce; + __u32 snd_cwnd; + __u32 snd_cwnd_cnt; + __u32 snd_cwnd_clamp; + __u32 snd_ssthresh; + __u8 syn_data:1, /* SYN includes data */ + syn_fastopen:1, /* SYN includes Fast Open option */ + syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ + syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ + save_syn:1, /* Save headers of SYN packet */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + syn_smc:1; /* SYN includes SMC */ + __u32 max_packets_out; + __u32 lsndtime; + __u32 prior_cwnd; + __u64 tcp_mstamp; /* most recent packet received/sent */ +} __attribute__((preserve_access_index)); + +static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +static __always_inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + +static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +static __always_inline bool before(__u32 seq1, __u32 seq2) +{ + return (__s32)(seq1-seq2) < 0; +} +#define after(seq2, seq1) before(seq1, seq2) + +#define TCP_ECN_OK 1 +#define TCP_ECN_QUEUE_CWR 2 +#define TCP_ECN_DEMAND_CWR 4 +#define TCP_ECN_SEEN 8 + +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4, + ICSK_ACK_PUSHED2 = 8, + ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */ +}; + +enum tcp_ca_event { + CA_EVENT_TX_START = 0, + CA_EVENT_CWND_RESTART = 1, + CA_EVENT_COMPLETE_CWR = 2, + CA_EVENT_LOSS = 3, + CA_EVENT_ECN_NO_CE = 4, + CA_EVENT_ECN_IS_CE = 5, +}; + +enum tcp_ca_state { + TCP_CA_Open = 0, + TCP_CA_Disorder = 1, + TCP_CA_CWR = 2, + TCP_CA_Recovery = 3, + TCP_CA_Loss = 4 +}; + +struct ack_sample { + __u32 pkts_acked; + __s32 rtt_us; + __u32 in_flight; +} __attribute__((preserve_access_index)); + +struct rate_sample { + __u64 prior_mstamp; /* starting timestamp for interval */ + __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + __s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + __u32 snd_interval_us; /* snd interval for delivered packets */ + __u32 rcv_interval_us; /* rcv interval for delivered packets */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + __u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ + bool is_retrans; /* is sample from retransmission? */ + bool is_ack_delayed; /* is this (likely) a delayed ACK? */ +} __attribute__((preserve_access_index)); + +#define TCP_CA_NAME_MAX 16 +#define TCP_CONG_NEEDS_ECN 0x2 + +struct tcp_congestion_ops { + char name[TCP_CA_NAME_MAX]; + __u32 flags; + + /* initialize private data (optional) */ + void (*init)(struct sock *sk); + /* cleanup private data (optional) */ + void (*release)(struct sock *sk); + + /* return slow start threshold (required) */ + __u32 (*ssthresh)(struct sock *sk); + /* do new cwnd calculation (required) */ + void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked); + /* call before changing ca_state (optional) */ + void (*set_state)(struct sock *sk, __u8 new_state); + /* call when cwnd event occurs (optional) */ + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); + /* call when ack arrives (optional) */ + void (*in_ack_event)(struct sock *sk, __u32 flags); + /* new value of cwnd after loss (required) */ + __u32 (*undo_cwnd)(struct sock *sk); + /* hook for packet ack accounting (optional) */ + void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* override sysctl_tcp_min_tso_segs */ + __u32 (*min_tso_segs)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + __u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); +}; + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) +{ + __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh); + + acked -= cwnd - tp->snd_cwnd; + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); + + return acked; +} + +static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp) +{ + return tp->snd_cwnd < tp->snd_ssthresh; +} + +static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + /* If in slow start, ensure cwnd grows to twice what was ACKed. */ + if (tcp_in_slow_start(tp)) + return tp->snd_cwnd < 2 * tp->max_packets_out; + + return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited); +} + +static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) +{ + /* If credits accumulated at a higher w, apply them gently now. */ + if (tp->snd_cwnd_cnt >= w) { + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd++; + } + + tp->snd_cwnd_cnt += acked; + if (tp->snd_cwnd_cnt >= w) { + __u32 delta = tp->snd_cwnd_cnt / w; + + tp->snd_cwnd_cnt -= delta * w; + tp->snd_cwnd += delta; + } + tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp); +} + +#endif diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h new file mode 100644 index 000000000000..c6f1354d93fb --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_trace_helpers.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACE_HELPERS_H +#define __BPF_TRACE_HELPERS_H + +#include <bpf/bpf_helpers.h> + +#define ___bpf_concat(a, b) a ## b +#define ___bpf_apply(fn, n) ___bpf_concat(fn, n) +#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N +#define ___bpf_narg(...) \ + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_empty(...) \ + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___bpf_ctx_cast0() ctx +#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] +#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] +#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] +#define ___bpf_ctx_cast(args...) \ + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) + +/* + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and + * similar kinds of BPF programs, that accept input arguments as a single + * pointer to untyped u64 array, where each u64 can actually be a typed + * pointer or integer of different size. Instead of requring user to write + * manual casts and work with array elements by index, BPF_PROG macro + * allows user to declare a list of named and typed input arguments in the + * same syntax as for normal C function. All the casting is hidden and + * performed transparently, while user code can just assume working with + * function arguments of specified type and name. + * + * Original raw context argument is preserved as well as 'ctx' argument. + * This is useful when using BPF helpers that expect original context + * as one of the parameters (e.g., for bpf_perf_event_output()). + */ +#define BPF_PROG(name, args...) \ +name(unsigned long long *ctx); \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args); \ +typeof(name(0)) name(unsigned long long *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_ctx_cast(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) \ +____##name(unsigned long long *ctx, ##args) + +struct pt_regs; + +#define ___bpf_kprobe_args0() ctx +#define ___bpf_kprobe_args1(x) \ + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) \ + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) \ + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) \ + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) \ + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...) \ + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) + +/* + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific + * low-level way of getting kprobe input arguments from struct pt_regs, and + * provides a familiar typed and named function arguments syntax and + * semantics of accessing kprobe input paremeters. + * + * Original struct pt_regs* context is preserved as 'ctx' argument. This might + * be necessary when using BPF helpers like bpf_perf_event_output(). + */ +#define BPF_KPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) + +#define ___bpf_kretprobe_args0() ctx +#define ___bpf_kretprobe_argsN(x, args...) \ + ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) +#define ___bpf_kretprobe_args(args...) \ + ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) + +/* + * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all + * input kprobe arguments, one last extra argument has to be specified, which + * captures kprobe return value. + */ +#define BPF_KRETPROBE(name, args...) \ +name(struct pt_regs *ctx); \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ +typeof(name(0)) name(struct pt_regs *ctx) \ +{ \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + return ____##name(___bpf_kretprobe_args(args)); \ + _Pragma("GCC diagnostic pop") \ +} \ +static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) +#endif diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index ec219f84e041..a3352a64c067 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,7 +6,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> -#include <libbpf.h> /* libbpf_num_possible_cpus */ +#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) { diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e95c33e333a4..0fb910df5387 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -41,7 +41,7 @@ * * If successful, 0 is returned. */ -int enable_all_controllers(char *cgroup_path) +static int enable_all_controllers(char *cgroup_path) { char path[PATH_MAX + 1]; char buf[PATH_MAX]; @@ -98,7 +98,7 @@ int enable_all_controllers(char *cgroup_path) */ int setup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; + char cgroup_workdir[PATH_MAX - 24]; format_cgroup_path(cgroup_workdir, ""); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f7a0744db31e..5dc109f4c097 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -34,3 +34,4 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_IPV6_SIT=m +CONFIG_BPF_JIT=y diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c new file mode 100644 index 000000000000..f0a64d8ac59a --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +static void map_batch_update(int map_fd, __u32 max_entries, int *keys, + int *values) +{ + int i, err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + for (i = 0; i < max_entries; i++) { + keys[i] = i; + values[i] = i + 1; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + int *keys, int *values) +{ + int i; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + CHECK(keys[i] + 1 != values[i], "key/value checking", + "error: i %d key %d value %d\n", i, keys[i], values[i]); + visited[i] = 1; + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void test_array_map_batch_ops(void) +{ + struct bpf_create_map_attr xattr = { + .name = "array_map", + .map_type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + }; + int map_fd, *keys, *values, *visited; + __u32 count, total, total_success; + const __u32 max_entries = 10; + bool nospace_err; + __u64 batch = 0; + int err, step; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + + keys = malloc(max_entries * sizeof(int)); + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", "error:%s\n", + strerror(errno)); + + /* populate elements to the map */ + map_batch_update(map_fd, max_entries, keys, values); + + /* test 1: lookup in a loop with various steps. */ + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values); + map_batch_verify(visited, max_entries, keys, values); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * sizeof(*values)); + batch = 0; + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each. + */ + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, &batch, + keys + total, + values + total, + &count, &opts); + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + + } + + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + + printf("%s:PASS\n", __func__); + + free(keys); + free(values); + free(visited); +} diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c new file mode 100644 index 000000000000..976bf415fbdd --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <stdio.h> +#include <errno.h> +#include <string.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <bpf_util.h> +#include <test_maps.h> + +static void map_batch_update(int map_fd, __u32 max_entries, int *keys, + void *values, bool is_pcpu) +{ + typedef BPF_DECLARE_PERCPU(int, value); + value *v = NULL; + int i, j, err; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + if (is_pcpu) + v = (value *)values; + + for (i = 0; i < max_entries; i++) { + keys[i] = i + 1; + if (is_pcpu) + for (j = 0; j < bpf_num_possible_cpus(); j++) + bpf_percpu(v[i], j) = i + 2 + j; + else + ((int *)values)[i] = i + 2; + } + + err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts); + CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno)); +} + +static void map_batch_verify(int *visited, __u32 max_entries, + int *keys, void *values, bool is_pcpu) +{ + typedef BPF_DECLARE_PERCPU(int, value); + value *v = NULL; + int i, j; + + if (is_pcpu) + v = (value *)values; + + memset(visited, 0, max_entries * sizeof(*visited)); + for (i = 0; i < max_entries; i++) { + + if (is_pcpu) { + for (j = 0; j < bpf_num_possible_cpus(); j++) { + CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j), + "key/value checking", + "error: i %d j %d key %d value %d\n", + i, j, keys[i], bpf_percpu(v[i], j)); + } + } else { + CHECK(keys[i] + 1 != ((int *)values)[i], + "key/value checking", + "error: i %d key %d value %d\n", i, keys[i], + ((int *)values)[i]); + } + + visited[i] = 1; + + } + for (i = 0; i < max_entries; i++) { + CHECK(visited[i] != 1, "visited checking", + "error: keys array at index %d missing\n", i); + } +} + +void __test_map_lookup_and_delete_batch(bool is_pcpu) +{ + __u32 batch, count, total, total_success; + typedef BPF_DECLARE_PERCPU(int, value); + int map_fd, *keys, *visited, key; + const __u32 max_entries = 10; + value pcpu_values[max_entries]; + int err, step, value_size; + bool nospace_err; + void *values; + struct bpf_create_map_attr xattr = { + .name = "hash_map", + .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : + BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(int), + }; + DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts, + .elem_flags = 0, + .flags = 0, + ); + + xattr.max_entries = max_entries; + map_fd = bpf_create_map_xattr(&xattr); + CHECK(map_fd == -1, + "bpf_create_map_xattr()", "error:%s\n", strerror(errno)); + + value_size = is_pcpu ? sizeof(value) : sizeof(int); + keys = malloc(max_entries * sizeof(int)); + if (is_pcpu) + values = pcpu_values; + else + values = malloc(max_entries * sizeof(int)); + visited = malloc(max_entries * sizeof(int)); + CHECK(!keys || !values || !visited, "malloc()", + "error:%s\n", strerror(errno)); + + /* test 1: lookup/delete an empty hash table, -ENOENT */ + count = max_entries; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK((err && errno != ENOENT), "empty map", + "error: %s\n", strerror(errno)); + + /* populate elements to the map */ + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + + /* test 2: lookup/delete with count = 0, success */ + count = 0; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK(err, "count = 0", "error: %s\n", strerror(errno)); + + /* test 3: lookup/delete with count = max_entries, success */ + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + count = max_entries; + err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys, + values, &count, &opts); + CHECK((err && errno != ENOENT), "count = max_entries", + "error: %s\n", strerror(errno)); + CHECK(count != max_entries, "count = max_entries", + "count = %u, max_entries = %u\n", count, max_entries); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + + /* bpf_map_get_next_key() should return -ENOENT for an empty map. */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno)); + + /* test 4: lookup/delete in a loop with various steps. */ + total_success = 0; + for (step = 1; step < max_entries; step++) { + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + total = 0; + /* iteratively lookup/delete elements with 'step' + * elements each + */ + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_batch(map_fd, + total ? &batch : NULL, + &batch, keys + total, + values + + total * value_size, + &count, &opts); + /* It is possible that we are failing due to buffer size + * not big enough. In such cases, let us just exit and + * go with large steps. Not that a buffer size with + * max_entries should always work. + */ + if (err && errno == ENOSPC) { + nospace_err = true; + break; + } + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + + } + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup with steps", + "total = %u, max_entries = %u\n", total, max_entries); + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + + total = 0; + count = step; + while (total < max_entries) { + if (max_entries - total < step) + count = max_entries - total; + err = bpf_map_delete_batch(map_fd, + keys + total, + &count, &opts); + CHECK((err && errno != ENOENT), "delete batch", + "error: %s\n", strerror(errno)); + total += count; + if (err) + break; + } + CHECK(total != max_entries, "delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + /* check map is empty, errono == ENOENT */ + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()", + "error: %s\n", strerror(errno)); + + /* iteratively lookup/delete elements with 'step' + * elements each + */ + map_batch_update(map_fd, max_entries, keys, values, is_pcpu); + memset(keys, 0, max_entries * sizeof(*keys)); + memset(values, 0, max_entries * value_size); + total = 0; + count = step; + nospace_err = false; + while (true) { + err = bpf_map_lookup_and_delete_batch(map_fd, + total ? &batch : NULL, + &batch, keys + total, + values + + total * value_size, + &count, &opts); + /* It is possible that we are failing due to buffer size + * not big enough. In such cases, let us just exit and + * go with large steps. Not that a buffer size with + * max_entries should always work. + */ + if (err && errno == ENOSPC) { + nospace_err = true; + break; + } + + CHECK((err && errno != ENOENT), "lookup with steps", + "error: %s\n", strerror(errno)); + + total += count; + if (err) + break; + } + + if (nospace_err == true) + continue; + + CHECK(total != max_entries, "lookup/delete with steps", + "total = %u, max_entries = %u\n", total, max_entries); + + map_batch_verify(visited, max_entries, keys, values, is_pcpu); + err = bpf_map_get_next_key(map_fd, NULL, &key); + CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", + strerror(errno)); + + total_success++; + } + + CHECK(total_success == 0, "check total_success", + "unexpected failure\n"); + free(keys); + free(visited); + if (!is_pcpu) + free(values); +} + +void htab_map_batch_ops(void) +{ + __test_map_lookup_and_delete_batch(false); + printf("test_%s:PASS\n", __func__); +} + +void htab_percpu_map_batch_ops(void) +{ + __test_map_lookup_and_delete_batch(true); + printf("test_%s:PASS\n", __func__); +} + +void test_htab_map_batch_ops(void) +{ + htab_map_batch_ops(); + htab_percpu_map_batch_ops(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 5ecc267d98b0..a0ee87c8e1ea 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_attach_probe.skel.h" ssize_t get_base_addr() { - size_t start; + size_t start, offset; char buf[256]; FILE *f; @@ -10,10 +11,11 @@ ssize_t get_base_addr() { if (!f) return -errno; - while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { + while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", + &start, buf, &offset) == 3) { if (strcmp(buf, "r-xp") == 0) { fclose(f); - return start; + return start - offset; } } @@ -23,22 +25,10 @@ ssize_t get_base_addr() { void test_attach_probe(void) { - const char *kprobe_name = "kprobe/sys_nanosleep"; - const char *kretprobe_name = "kretprobe/sys_nanosleep"; - const char *uprobe_name = "uprobe/trigger_func"; - const char *uretprobe_name = "uretprobe/trigger_func"; - const int kprobe_idx = 0, kretprobe_idx = 1; - const int uprobe_idx = 2, uretprobe_idx = 3; - const char *file = "./test_attach_probe.o"; - struct bpf_program *kprobe_prog, *kretprobe_prog; - struct bpf_program *uprobe_prog, *uretprobe_prog; - struct bpf_object *obj; - int err, prog_fd, duration = 0, res; - struct bpf_link *kprobe_link = NULL; - struct bpf_link *kretprobe_link = NULL; - struct bpf_link *uprobe_link = NULL; - struct bpf_link *uretprobe_link = NULL; - int results_map_fd; + int duration = 0; + struct bpf_link *kprobe_link, *kretprobe_link; + struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe* skel; size_t uprobe_offset; ssize_t base_addr; @@ -48,113 +38,68 @@ void test_attach_probe(void) return; uprobe_offset = (size_t)&get_base_addr - base_addr; - /* load programs */ - err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + skel = test_attach_probe__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; - - kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", kprobe_name)) - goto cleanup; - kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); - if (CHECK(!kretprobe_prog, "find_probe", - "prog '%s' not found\n", kretprobe_name)) - goto cleanup; - uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name); - if (CHECK(!uprobe_prog, "find_probe", - "prog '%s' not found\n", uprobe_name)) - goto cleanup; - uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name); - if (CHECK(!uretprobe_prog, "find_probe", - "prog '%s' not found\n", uretprobe_name)) - goto cleanup; - - /* load maps */ - results_map_fd = bpf_find_map(__func__, obj, "results_map"); - if (CHECK(results_map_fd < 0, "find_results_map", - "err %d\n", results_map_fd)) + if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n")) goto cleanup; - kprobe_link = bpf_program__attach_kprobe(kprobe_prog, + kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) { - kprobe_link = NULL; + "err %ld\n", PTR_ERR(kprobe_link))) goto cleanup; - } - kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog, + skel->links.handle_kprobe = kprobe_link; + + kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, true /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", - "err %ld\n", PTR_ERR(kretprobe_link))) { - kretprobe_link = NULL; + "err %ld\n", PTR_ERR(kretprobe_link))) goto cleanup; - } - uprobe_link = bpf_program__attach_uprobe(uprobe_prog, + skel->links.handle_kretprobe = kretprobe_link; + + uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe, false /* retprobe */, 0 /* self pid */, "/proc/self/exe", uprobe_offset); if (CHECK(IS_ERR(uprobe_link), "attach_uprobe", - "err %ld\n", PTR_ERR(uprobe_link))) { - uprobe_link = NULL; + "err %ld\n", PTR_ERR(uprobe_link))) goto cleanup; - } - uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog, + skel->links.handle_uprobe = uprobe_link; + + uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe, true /* retprobe */, -1 /* any pid */, "/proc/self/exe", uprobe_offset); if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe", - "err %ld\n", PTR_ERR(uretprobe_link))) { - uretprobe_link = NULL; + "err %ld\n", PTR_ERR(uretprobe_link))) goto cleanup; - } + skel->links.handle_uretprobe = uretprobe_link; /* trigger & validate kprobe && kretprobe */ usleep(1); - err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res); - if (CHECK(err, "get_kprobe_res", - "failed to get kprobe res: %d\n", err)) + if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res", + "wrong kprobe res: %d\n", skel->bss->kprobe_res)) goto cleanup; - if (CHECK(res != kprobe_idx + 1, "check_kprobe_res", - "wrong kprobe res: %d\n", res)) - goto cleanup; - - err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res); - if (CHECK(err, "get_kretprobe_res", - "failed to get kretprobe res: %d\n", err)) - goto cleanup; - if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res", - "wrong kretprobe res: %d\n", res)) + if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res", + "wrong kretprobe res: %d\n", skel->bss->kretprobe_res)) goto cleanup; /* trigger & validate uprobe & uretprobe */ get_base_addr(); - err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res); - if (CHECK(err, "get_uprobe_res", - "failed to get uprobe res: %d\n", err)) - goto cleanup; - if (CHECK(res != uprobe_idx + 1, "check_uprobe_res", - "wrong uprobe res: %d\n", res)) - goto cleanup; - - err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res); - if (CHECK(err, "get_uretprobe_res", - "failed to get uretprobe res: %d\n", err)) + if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", + "wrong uprobe res: %d\n", skel->bss->uprobe_res)) goto cleanup; - if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res", - "wrong uretprobe res: %d\n", res)) + if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res", + "wrong uretprobe res: %d\n", skel->bss->uretprobe_res)) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); - bpf_link__destroy(kretprobe_link); - bpf_link__destroy(uprobe_link); - bpf_link__destroy(uretprobe_link); - bpf_object__close(obj); + test_attach_probe__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c index cb827383db4d..f10029821e16 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c @@ -48,16 +48,17 @@ void test_bpf_obj_id(void) /* test_obj_id.o is a dumb prog. It should never fail * to load. */ - if (err) - error_cnt++; - assert(!err); + if (CHECK_FAIL(err)) + continue; /* Insert a magic value to the map */ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); - assert(map_fds[i] >= 0); + if (CHECK_FAIL(map_fds[i] < 0)) + goto done; err = bpf_map_update_elem(map_fds[i], &array_key, &array_magic_value, 0); - assert(!err); + if (CHECK_FAIL(err)) + goto done; /* Check getting map info */ info_len = sizeof(struct bpf_map_info) * 2; @@ -96,9 +97,11 @@ void test_bpf_obj_id(void) prog_infos[i].map_ids = ptr_to_u64(map_ids + i); prog_infos[i].nr_map_ids = 2; err = clock_gettime(CLOCK_REALTIME, &real_time_ts); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i], &info_len); load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec) @@ -106,8 +109,8 @@ void test_bpf_obj_id(void) if (CHECK(err || prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || - (jit_enabled && !prog_infos[i].jited_prog_len) || - (jit_enabled && + (env.jit_enabled && !prog_infos[i].jited_prog_len) || + (env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))) || !prog_infos[i].xlated_prog_len || !memcmp(xlated_insns, zeros, sizeof(zeros)) || @@ -121,7 +124,7 @@ void test_bpf_obj_id(void) err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), - jit_enabled, + env.jit_enabled, prog_infos[i].jited_prog_len, prog_infos[i].xlated_prog_len, !!memcmp(jited_insns, zeros, sizeof(zeros)), @@ -224,7 +227,8 @@ void test_bpf_obj_id(void) nr_id_found++; err = bpf_map_lookup_elem(map_fd, &array_key, &array_value); - assert(!err); + if (CHECK_FAIL(err)) + goto done; err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); CHECK(err || info_len != sizeof(struct bpf_map_info) || diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c new file mode 100644 index 000000000000..8482bbc67eec --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <linux/err.h> +#include <test_progs.h> +#include "bpf_dctcp.skel.h" +#include "bpf_cubic.skel.h" + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static const unsigned int total_bytes = 10 * 1024 * 1024; +static const struct timeval timeo_sec = { .tv_sec = 10 }; +static const size_t timeo_optlen = sizeof(timeo_sec); +static int stop, duration; + +static int settimeo(int fd) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, + timeo_optlen); + if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n", + errno)) + return -1; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec, + timeo_optlen); + if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n", + errno)) + return -1; + + return 0; +} + +static int settcpca(int fd, const char *tcp_ca) +{ + int err; + + err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca)); + if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n", + errno)) + return -1; + + return 0; +} + +static void *server(void *arg) +{ + int lfd = (int)(long)arg, err = 0, fd; + ssize_t nr_sent = 0, bytes = 0; + char batch[1500]; + + fd = accept(lfd, NULL, NULL); + while (fd == -1) { + if (errno == EINTR) + continue; + err = -errno; + goto done; + } + + if (settimeo(fd)) { + err = -errno; + goto done; + } + + while (bytes < total_bytes && !READ_ONCE(stop)) { + nr_sent = send(fd, &batch, + min(total_bytes - bytes, sizeof(batch)), 0); + if (nr_sent == -1 && errno == EINTR) + continue; + if (nr_sent == -1) { + err = -errno; + break; + } + bytes += nr_sent; + } + + CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n", + bytes, total_bytes, nr_sent, errno); + +done: + if (fd != -1) + close(fd); + if (err) { + WRITE_ONCE(stop, 1); + return ERR_PTR(err); + } + return NULL; +} + +static void do_test(const char *tcp_ca) +{ + struct sockaddr_in6 sa6 = {}; + ssize_t nr_recv = 0, bytes = 0; + int lfd = -1, fd = -1; + pthread_t srv_thread; + socklen_t addrlen = sizeof(sa6); + void *thread_ret; + char batch[1500]; + int err; + + WRITE_ONCE(stop, 0); + + lfd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(lfd == -1, "socket", "errno:%d\n", errno)) + return; + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) { + close(lfd); + return; + } + + if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) || + settimeo(lfd) || settimeo(fd)) + goto done; + + /* bind, listen and start server thread to accept */ + sa6.sin6_family = AF_INET6; + sa6.sin6_addr = in6addr_loopback; + err = bind(lfd, (struct sockaddr *)&sa6, addrlen); + if (CHECK(err == -1, "bind", "errno:%d\n", errno)) + goto done; + err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen); + if (CHECK(err == -1, "getsockname", "errno:%d\n", errno)) + goto done; + err = listen(lfd, 1); + if (CHECK(err == -1, "listen", "errno:%d\n", errno)) + goto done; + err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd); + if (CHECK(err != 0, "pthread_create", "err:%d\n", err)) + goto done; + + /* connect to server */ + err = connect(fd, (struct sockaddr *)&sa6, addrlen); + if (CHECK(err == -1, "connect", "errno:%d\n", errno)) + goto wait_thread; + + /* recv total_bytes */ + while (bytes < total_bytes && !READ_ONCE(stop)) { + nr_recv = recv(fd, &batch, + min(total_bytes - bytes, sizeof(batch)), 0); + if (nr_recv == -1 && errno == EINTR) + continue; + if (nr_recv == -1) + break; + bytes += nr_recv; + } + + CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n", + bytes, total_bytes, nr_recv, errno); + +wait_thread: + WRITE_ONCE(stop, 1); + pthread_join(srv_thread, &thread_ret); + CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld", + PTR_ERR(thread_ret)); +done: + close(lfd); + close(fd); +} + +static void test_cubic(void) +{ + struct bpf_cubic *cubic_skel; + struct bpf_link *link; + + cubic_skel = bpf_cubic__open_and_load(); + if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n")) + return; + + link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", + PTR_ERR(link))) { + bpf_cubic__destroy(cubic_skel); + return; + } + + do_test("bpf_cubic"); + + bpf_link__destroy(link); + bpf_cubic__destroy(cubic_skel); +} + +static void test_dctcp(void) +{ + struct bpf_dctcp *dctcp_skel; + struct bpf_link *link; + + dctcp_skel = bpf_dctcp__open_and_load(); + if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n")) + return; + + link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); + if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n", + PTR_ERR(link))) { + bpf_dctcp__destroy(dctcp_skel); + return; + } + + do_test("bpf_dctcp"); + + bpf_link__destroy(link); + bpf_dctcp__destroy(dctcp_skel); +} + +void test_bpf_tcp_ca(void) +{ + if (test__start_subtest("dctcp")) + test_dctcp(); + if (test__start_subtest("cubic")) + test_cubic(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e1b55261526f..e9f2f12ba06b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -4,14 +4,19 @@ static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { - if (level != LIBBPF_DEBUG) - return vfprintf(stderr, format, args); + if (level != LIBBPF_DEBUG) { + vprintf(format, args); + return 0; + } if (!strstr(format, "verifier log")) return 0; - return vfprintf(stderr, "%s", args); + vprintf("%s", args); + return 0; } +extern int extra_prog_load_log_flags; + static int check_load(const char *file, enum bpf_prog_type type) { struct bpf_prog_load_attr attr; @@ -21,23 +26,34 @@ static int check_load(const char *file, enum bpf_prog_type type) memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); attr.file = file; attr.prog_type = type; - attr.log_level = 4; + attr.log_level = 4 | extra_prog_load_log_flags; attr.prog_flags = BPF_F_TEST_RND_HI32; err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); bpf_object__close(obj); - if (err) - error_cnt++; return err; } +struct scale_test_def { + const char *file; + enum bpf_prog_type attach_type; + bool fails; +}; + void test_bpf_verif_scale(void) { - const char *sched_cls[] = { - "./test_verif_scale1.o", "./test_verif_scale2.o", "./test_verif_scale3.o", - }; - const char *raw_tp[] = { + struct scale_test_def tests[] = { + { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ }, + + { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS }, + { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS }, + { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, + + { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + /* full unroll by llvm */ - "./pyperf50.o", "./pyperf100.o", "./pyperf180.o", + { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* partial unroll. llvm will unroll loop ~150 times. * C loop count -> 600. @@ -45,7 +61,7 @@ void test_bpf_verif_scale(void) * 16k insns in loop body. * Total of 5 such loops. Total program size ~82k insns. */ - "./pyperf600.o", + { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* no unroll at all. * C loop count -> 600. @@ -53,48 +69,47 @@ void test_bpf_verif_scale(void) * ~110 insns in loop body. * Total of 5 such loops. Total program size ~1500 insns. */ - "./pyperf600_nounroll.o", + { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - "./loop1.o", "./loop2.o", + { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "loop4.o", BPF_PROG_TYPE_SCHED_CLS }, + { "loop5.o", BPF_PROG_TYPE_SCHED_CLS }, /* partial unroll. 19k insn in a loop. * Total program size 20.8k insn. * ~350k processed_insns */ - "./strobemeta.o", + { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* no unroll, tiny loops */ - "./strobemeta_nounroll1.o", - "./strobemeta_nounroll2.o", - }; - const char *cg_sysctl[] = { - "./test_sysctl_loop1.o", "./test_sysctl_loop2.o", - }; - int err, i; + { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, - if (verifier_stats) - libbpf_set_print(libbpf_debug_print); + { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, + { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, - err = check_load("./loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT); - printf("test_scale:loop3:%s\n", err ? (error_cnt--, "OK") : "FAIL"); + { "test_xdp_loop.o", BPF_PROG_TYPE_XDP }, + { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL }, + }; + libbpf_print_fn_t old_print_fn = NULL; + int err, i; - for (i = 0; i < ARRAY_SIZE(sched_cls); i++) { - err = check_load(sched_cls[i], BPF_PROG_TYPE_SCHED_CLS); - printf("test_scale:%s:%s\n", sched_cls[i], err ? "FAIL" : "OK"); + if (env.verifier_stats) { + test__force_log(); + old_print_fn = libbpf_set_print(libbpf_debug_print); } - for (i = 0; i < ARRAY_SIZE(raw_tp); i++) { - err = check_load(raw_tp[i], BPF_PROG_TYPE_RAW_TRACEPOINT); - printf("test_scale:%s:%s\n", raw_tp[i], err ? "FAIL" : "OK"); - } + for (i = 0; i < ARRAY_SIZE(tests); i++) { + const struct scale_test_def *test = &tests[i]; + + if (!test__start_subtest(test->file)) + continue; - for (i = 0; i < ARRAY_SIZE(cg_sysctl); i++) { - err = check_load(cg_sysctl[i], BPF_PROG_TYPE_CGROUP_SYSCTL); - printf("test_scale:%s:%s\n", cg_sysctl[i], err ? "FAIL" : "OK"); + err = check_load(test->file, test->attach_type); + CHECK_FAIL(err && !test->fails); } - err = check_load("./test_xdp_loop.o", BPF_PROG_TYPE_XDP); - printf("test_scale:test_xdp_loop:%s\n", err ? "FAIL" : "OK"); - err = check_load("./test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL); - printf("test_scale:test_seg6_loop:%s\n", err ? "FAIL" : "OK"); + if (env.verifier_stats) + libbpf_set_print(old_print_fn); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c new file mode 100644 index 000000000000..7390d3061065 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; + +void btf_dump_printf(void *ctx, const char *fmt, va_list args) +{ + vfprintf(ctx, fmt, args); +} + +static struct btf_dump_test_case { + const char *name; + const char *file; + struct btf_dump_opts opts; +} btf_dump_test_cases[] = { + {"btf_dump: syntax", "btf_dump_test_case_syntax", {}}, + {"btf_dump: ordering", "btf_dump_test_case_ordering", {}}, + {"btf_dump: padding", "btf_dump_test_case_padding", {}}, + {"btf_dump: packing", "btf_dump_test_case_packing", {}}, + {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}}, + {"btf_dump: multidim", "btf_dump_test_case_multidim", {}}, + {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}}, +}; + +static int btf_dump_all_types(const struct btf *btf, + const struct btf_dump_opts *opts) +{ + size_t type_cnt = btf__get_nr_types(btf); + struct btf_dump *d; + int err = 0, id; + + d = btf_dump__new(btf, NULL, opts, btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); + + for (id = 1; id <= type_cnt; id++) { + err = btf_dump__dump_type(d, id); + if (err) + goto done; + } + +done: + btf_dump__free(d); + return err; +} + +static int test_btf_dump_case(int n, struct btf_dump_test_case *t) +{ + char test_file[256], out_file[256], diff_cmd[1024]; + struct btf *btf = NULL; + int err = 0, fd = -1; + FILE *f = NULL; + + snprintf(test_file, sizeof(test_file), "%s.o", t->file); + + btf = btf__parse_elf(test_file, NULL); + if (CHECK(IS_ERR(btf), "btf_parse_elf", + "failed to load test BTF: %ld\n", PTR_ERR(btf))) { + err = -PTR_ERR(btf); + btf = NULL; + goto done; + } + + snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file); + fd = mkstemp(out_file); + if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) { + err = fd; + goto done; + } + f = fdopen(fd, "w"); + if (CHECK(f == NULL, "open_tmp", "failed to open file: %s(%d)\n", + strerror(errno), errno)) { + close(fd); + goto done; + } + + t->opts.ctx = f; + err = btf_dump_all_types(btf, &t->opts); + fclose(f); + close(fd); + if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) { + goto done; + } + + snprintf(test_file, sizeof(test_file), "progs/%s.c", t->file); + if (access(test_file, R_OK) == -1) + /* + * When the test is run with O=, kselftest copies TEST_FILES + * without preserving the directory structure. + */ + snprintf(test_file, sizeof(test_file), "%s.c", t->file); + /* + * Diff test output and expected test output, contained between + * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case. + * For expected output lines, everything before '*' is stripped out. + * Also lines containing comment start and comment end markers are + * ignored. + */ + snprintf(diff_cmd, sizeof(diff_cmd), + "awk '/START-EXPECTED-OUTPUT/{out=1;next} " + "/END-EXPECTED-OUTPUT/{out=0} " + "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */ + "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'", + test_file, out_file); + err = system(diff_cmd); + if (CHECK(err, "diff", + "differing test output, output=%s, err=%d, diff cmd:\n%s\n", + out_file, err, diff_cmd)) + goto done; + + remove(out_file); + +done: + btf__free(btf); + return err; +} + +void test_btf_dump() { + int i; + + for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) { + struct btf_dump_test_case *t = &btf_dump_test_cases[i]; + + if (!test__start_subtest(t->name)) + continue; + + test_btf_dump_case(i, &btf_dump_test_cases[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c new file mode 100644 index 000000000000..5b13f2c6c402 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int prog_load(void) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); +} + +void test_cgroup_attach_autodetach(void) +{ + __u32 duration = 0, prog_cnt = 4, attach_flags; + int allow_prog[2] = {-1}; + __u32 prog_ids[2] = {0}; + void *ptr = NULL; + int cg = 0, i; + int attempts; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + allow_prog[i] = prog_load(); + if (CHECK(allow_prog[i] < 0, "prog_load", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + } + + if (CHECK_FAIL(setup_cgroup_environment())) + goto err; + + /* create a cgroup, attach two programs and remember their ids */ + cg = create_and_get_cgroup("/cg_autodetach"); + if (CHECK_FAIL(cg < 0)) + goto err; + + if (CHECK_FAIL(join_cgroup("/cg_autodetach"))) + goto err; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (CHECK(bpf_prog_attach(allow_prog[i], cg, + BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog_attach", "prog[%d], errno=%d\n", i, errno)) + goto err; + + /* make sure that programs are attached and run some traffic */ + if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, + prog_ids, &prog_cnt), + "prog_query", "errno=%d\n", errno)) + goto err; + if (CHECK_FAIL(system(PING_CMD))) + goto err; + + /* allocate some memory (4Mb) to pin the original cgroup */ + ptr = malloc(4 * (1 << 20)); + if (CHECK_FAIL(!ptr)) + goto err; + + /* close programs and cgroup fd */ + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + close(allow_prog[i]); + allow_prog[i] = -1; + } + + close(cg); + cg = 0; + + /* leave the cgroup and remove it. don't detach programs */ + cleanup_cgroup_environment(); + + /* wait for the asynchronous auto-detachment. + * wait for no more than 5 sec and give up. + */ + for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { + for (attempts = 5; attempts >= 0; attempts--) { + int fd = bpf_prog_get_fd_by_id(prog_ids[i]); + + if (fd < 0) + break; + + /* don't leave the fd open */ + close(fd); + + if (CHECK_FAIL(!attempts)) + goto err; + + sleep(1); + } + } + +err: + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (allow_prog[i] >= 0) + close(allow_prog[i]); + if (cg) + close(cg); + free(ptr); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c new file mode 100644 index 000000000000..2ff21dbce179 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int map_fd = -1; + +static int prog_load_cnt(int verdict, int val) +{ + int cgroup_storage_fd, percpu_cgroup_storage_fd; + + if (map_fd < 0) + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + percpu_cgroup_storage_fd = bpf_create_map( + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (percpu_cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + struct bpf_insn prog[] = { + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, val), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + + BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + close(cgroup_storage_fd); + return ret; +} + +void test_cgroup_attach_multi(void) +{ + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); + int allow_prog[7] = {-1}; + unsigned long long value; + __u32 duration = 0; + int i = 0; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (CHECK(allow_prog[i] < 0, "prog_load", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + } + + if (CHECK_FAIL(setup_cgroup_environment())) + goto err; + + cg1 = create_and_get_cgroup("/cg1"); + if (CHECK_FAIL(cg1 < 0)) + goto err; + cg2 = create_and_get_cgroup("/cg1/cg2"); + if (CHECK_FAIL(cg2 < 0)) + goto err; + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); + if (CHECK_FAIL(cg3 < 0)) + goto err; + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); + if (CHECK_FAIL(cg4 < 0)) + goto err; + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); + if (CHECK_FAIL(cg5 < 0)) + goto err; + + if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5"))) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog0_attach_to_cg1_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "fail_same_prog_attach_to_cg1", "unexpected success\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog1_attach_to_cg1_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog2_attach_to_cg2_override", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog3_attach_to_cg3_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog4_attach_to_cg4_override", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0), + "prog5_attach_to_cg5_none", "errno=%d\n", errno)) + goto err; + + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 1 + 2 + 8 + 32); + + /* query the number of effective progs in cg5 */ + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt)); + CHECK_FAIL(prog_cnt != 4); + /* retrieve prog_ids of effective progs in cg5 */ + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 4); + CHECK_FAIL(attach_flags != 0); + saved_prog_id = prog_ids[0]; + /* check enospc handling */ + prog_ids[0] = 0; + prog_cnt = 2; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt) != -1); + CHECK_FAIL(errno != ENOSPC); + CHECK_FAIL(prog_cnt != 4); + /* check that prog_ids are returned even when buffer is too small */ + CHECK_FAIL(prog_ids[0] != saved_prog_id); + /* retrieve prog_id of single attached prog in cg5 */ + prog_ids[0] = 0; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 1); + CHECK_FAIL(prog_ids[0] != saved_prog_id); + + /* detach bottom program and ping again */ + if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS), + "prog_detach_from_cg5", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 1 + 2 + 8 + 16); + + /* test replace */ + + attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; + attach_opts.replace_prog_fd = allow_prog[0]; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_override", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EINVAL); + + attach_opts.flags = BPF_F_REPLACE; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_no_multi", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EINVAL); + + attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; + attach_opts.replace_prog_fd = -1; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_bad_fd", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EBADF); + + /* replacing a program that is not attached to cgroup should fail */ + attach_opts.replace_prog_fd = allow_prog[3]; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_no_ent", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != ENOENT); + + /* replace 1st from the top program */ + attach_opts.replace_prog_fd = allow_prog[0]; + if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "prog_replace", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 8 + 16); + + /* detach 3rd from bottom program and ping again */ + if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS), + "fail_prog_detach_from_cg3", "unexpected success\n")) + goto err; + + if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS), + "prog3_detach_from_cg3", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 16); + + /* detach 2nd from bottom program and ping again */ + if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS), + "prog_detach_from_cg4", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 4); + + prog_cnt = 4; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 3); + CHECK_FAIL(attach_flags != 0); + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 0); + +err: + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (allow_prog[i] >= 0) + close(allow_prog[i]); + close(cg1); + close(cg2); + close(cg3); + close(cg4); + close(cg5); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c new file mode 100644 index 000000000000..9d8cb48b99de --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define FOO "/foo" +#define BAR "/foo/bar/" +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int prog_load(int verdict) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); +} + +void test_cgroup_attach_override(void) +{ + int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; + __u32 duration = 0; + + allow_prog = prog_load(1); + if (CHECK(allow_prog < 0, "prog_load_allow", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + + drop_prog = prog_load(0); + if (CHECK(drop_prog < 0, "prog_load_drop", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + + foo = test__join_cgroup(FOO); + if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n")) + goto err; + + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_drop_foo_override", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + bar = test__join_cgroup(BAR); + if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n")) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) + goto err; + + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), + "prog_detach_bar", + "detach prog from %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), + "prog_detach_foo", + "detach prog from %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), + "fail_prog_attach_allow_bar_none", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), + "prog_detach_bar", + "detach prog from %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), + "fail_prog_detach_foo", + "double detach from %s unexpectedly succeeded\n", FOO)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0), + "prog_attach_allow_foo_none", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), + "fail_prog_attach_allow_bar_none", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "fail_prog_attach_allow_bar_override", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "fail_prog_attach_allow_foo_override", + "attach prog to %s unexpectedly succeeded\n", FOO)) + goto err; + + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0), + "prog_attach_drop_foo_none", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + +err: + close(foo); + close(bar); + close(allow_prog); + close(drop_prog); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c new file mode 100644 index 000000000000..b093787e9448 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <linux/version.h> +#include "test_core_extern.skel.h" + +static uint32_t get_kernel_version(void) +{ + uint32_t major, minor, patch; + struct utsname info; + + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; + return KERNEL_VERSION(major, minor, patch); +} + +#define CFG "CONFIG_BPF_SYSCALL=n\n" + +static struct test_case { + const char *name; + const char *cfg; + bool fails; + struct test_core_extern__data data; +} test_cases[] = { + { .name = "default search path", .data = { .bpf_syscall = true } }, + { + .name = "custom values", + .cfg = "CONFIG_BPF_SYSCALL=n\n" + "CONFIG_TRISTATE=m\n" + "CONFIG_BOOL=y\n" + "CONFIG_CHAR=100\n" + "CONFIG_USHORT=30000\n" + "CONFIG_INT=123456\n" + "CONFIG_ULONG=0xDEADBEEFC0DE\n" + "CONFIG_STR=\"abracad\"\n" + "CONFIG_MISSING=0", + .data = { + .bpf_syscall = false, + .tristate_val = TRI_MODULE, + .bool_val = true, + .char_val = 100, + .ushort_val = 30000, + .int_val = 123456, + .ulong_val = 0xDEADBEEFC0DE, + .str_val = "abracad", + }, + }, + /* TRISTATE */ + { .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n", + .data = { .tristate_val = TRI_YES } }, + { .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n", + .data = { .tristate_val = TRI_NO } }, + { .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n", + .data = { .tristate_val = TRI_MODULE } }, + { .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" }, + { .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" }, + /* BOOL */ + { .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n", + .data = { .bool_val = true } }, + { .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n", + .data = { .bool_val = false } }, + { .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" }, + { .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" }, + /* CHAR */ + { .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n", + .data = { .char_val = 'm' } }, + { .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" }, + { .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" }, + { .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" }, + /* STRING */ + { .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n", + .data = { .str_val = "\0\0\0\0\0\0\0" } }, + { .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n", + .data = { .str_val = "abra\0\0\0" } }, + { .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n", + .data = { .str_val = "abracad" } }, + { .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" }, + { .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" }, + /* INTEGERS */ + { + .name = "integer forms", + .cfg = CFG + "CONFIG_CHAR=0xA\n" + "CONFIG_USHORT=0462\n" + "CONFIG_INT=-100\n" + "CONFIG_ULONG=+1000000000000", + .data = { + .char_val = 0xA, + .ushort_val = 0462, + .int_val = -100, + .ulong_val = 1000000000000, + }, + }, + { .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" }, + { .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" }, + { .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" }, + { .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" }, + { .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647", + .data = { .int_val = 2147483647 } }, + { .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648", + .data = { .int_val = -2147483648 } }, + { .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" }, + { .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" }, + { .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535", + .data = { .ushort_val = 65535 } }, + { .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0", + .data = { .ushort_val = 0 } }, + { .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" }, + { .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" }, + { .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff", + .data = { .ulong_val = 0xffffffffffffffff } }, + { .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0", + .data = { .ulong_val = 0 } }, + { .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" }, +}; + +void test_core_extern(void) +{ + const uint32_t kern_ver = get_kernel_version(); + int err, duration = 0, i, j; + struct test_core_extern *skel = NULL; + uint64_t *got, *exp; + int n = sizeof(*skel->data) / sizeof(uint64_t); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + struct test_case *t = &test_cases[i]; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .kconfig = t->cfg, + ); + + if (!test__start_subtest(t->name)) + continue; + + skel = test_core_extern__open_opts(&opts); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + goto cleanup; + err = test_core_extern__load(skel); + if (t->fails) { + CHECK(!err, "skel_load", + "shouldn't succeed open/load of skeleton\n"); + goto cleanup; + } else if (CHECK(err, "skel_load", + "failed to open/load skeleton\n")) { + goto cleanup; + } + err = test_core_extern__attach(skel); + if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + goto cleanup; + + usleep(1); + + t->data.kern_ver = kern_ver; + t->data.missing_val = 0xDEADC0DE; + got = (uint64_t *)skel->data; + exp = (uint64_t *)&t->data; + for (j = 0; j < n; j++) { + CHECK(got[j] != exp[j], "check_res", + "result #%d: expected %lx, but got %lx\n", + j, exp[j], got[j]); + } +cleanup: + test_core_extern__destroy(skel); + skel = NULL; + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c new file mode 100644 index 000000000000..31e177adbdf1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -0,0 +1,582 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "progs/core_reloc_types.h" +#include <sys/mman.h> +#include <sys/syscall.h> + +#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) + +#define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .a = 42, \ + .b = 0xc001, \ + .c = 0xbeef, \ +} + +#define FLAVORS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_flavors.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define FLAVORS_CASE(name) { \ + FLAVORS_CASE_COMMON(name), \ + .input = FLAVORS_DATA(core_reloc_##name), \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = FLAVORS_DATA(core_reloc_flavors), \ + .output_len = sizeof(struct core_reloc_flavors), \ +} + +#define FLAVORS_ERR_CASE(name) { \ + FLAVORS_CASE_COMMON(name), \ + .fails = true, \ +} + +#define NESTING_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .a = { .a = { .a = 42 } }, \ + .b = { .b = { .b = 0xc001 } }, \ +} + +#define NESTING_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_nesting.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" + +#define NESTING_CASE(name) { \ + NESTING_CASE_COMMON(name), \ + .input = NESTING_DATA(core_reloc_##name), \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = NESTING_DATA(core_reloc_nesting), \ + .output_len = sizeof(struct core_reloc_nesting) \ +} + +#define NESTING_ERR_CASE(name) { \ + NESTING_CASE_COMMON(name), \ + .fails = true, \ +} + +#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .a = { [2] = 1 }, \ + .b = { [1] = { [2] = { [3] = 2 } } }, \ + .c = { [1] = { .c = 3 } }, \ + .d = { [0] = { [0] = { .d = 4 } } }, \ +} + +#define ARRAYS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_arrays.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" + +#define ARRAYS_CASE(name) { \ + ARRAYS_CASE_COMMON(name), \ + .input = ARRAYS_DATA(core_reloc_##name), \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) { \ + .a2 = 1, \ + .b123 = 2, \ + .c1c = 3, \ + .d00d = 4, \ + .f10c = 0, \ + }, \ + .output_len = sizeof(struct core_reloc_arrays_output) \ +} + +#define ARRAYS_ERR_CASE(name) { \ + ARRAYS_CASE_COMMON(name), \ + .fails = true, \ +} + +#define PRIMITIVES_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .a = 1, \ + .b = 2, \ + .c = 3, \ + .d = (void *)4, \ + .f = (void *)5, \ +} + +#define PRIMITIVES_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_primitives.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" + +#define PRIMITIVES_CASE(name) { \ + PRIMITIVES_CASE_COMMON(name), \ + .input = PRIMITIVES_DATA(core_reloc_##name), \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = PRIMITIVES_DATA(core_reloc_primitives), \ + .output_len = sizeof(struct core_reloc_primitives), \ +} + +#define PRIMITIVES_ERR_CASE(name) { \ + PRIMITIVES_CASE_COMMON(name), \ + .fails = true, \ +} + +#define MODS_CASE(name) { \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_mods.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) { \ + .a = 1, \ + .b = 2, \ + .c = (void *)3, \ + .d = (void *)4, \ + .e = { [2] = 5 }, \ + .f = { [1] = 6 }, \ + .g = { .x = 7 }, \ + .h = { .y = 8 }, \ + }, \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_mods_output) { \ + .a = 1, .b = 2, .c = 3, .d = 4, \ + .e = 5, .f = 6, .g = 7, .h = 8, \ + }, \ + .output_len = sizeof(struct core_reloc_mods_output), \ +} + +#define PTR_AS_ARR_CASE(name) { \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_ptr_as_arr.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .input = (const char *)&(struct core_reloc_##name []){ \ + { .a = 1 }, \ + { .a = 2 }, \ + { .a = 3 }, \ + }, \ + .input_len = 3 * sizeof(struct core_reloc_##name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_ptr_as_arr) { \ + .a = 3, \ + }, \ + .output_len = sizeof(struct core_reloc_ptr_as_arr), \ +} + +#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .u8_field = 1, \ + .s8_field = 2, \ + .u16_field = 3, \ + .s16_field = 4, \ + .u32_field = 5, \ + .s32_field = 6, \ + .u64_field = 7, \ + .s64_field = 8, \ +} + +#define INTS_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_ints.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" + +#define INTS_CASE(name) { \ + INTS_CASE_COMMON(name), \ + .input = INTS_DATA(core_reloc_##name), \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = INTS_DATA(core_reloc_ints), \ + .output_len = sizeof(struct core_reloc_ints), \ +} + +#define INTS_ERR_CASE(name) { \ + INTS_CASE_COMMON(name), \ + .fails = true, \ +} + +#define EXISTENCE_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_existence.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .relaxed_core_relocs = true + +#define EXISTENCE_ERR_CASE(name) { \ + EXISTENCE_CASE_COMMON(name), \ + .fails = true, \ +} + +#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ + .case_name = test_name_prefix#name, \ + .bpf_obj_file = objfile, \ + .btf_src_file = "btf__core_reloc_" #name ".o" + +#define BITFIELDS_CASE(name, ...) { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ + "direct:", name), \ + .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_bitfields_output), \ +}, { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ + "probed:", name), \ + .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ + .input_len = sizeof(struct core_reloc_##name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_bitfields_output), \ + .direct_raw_tp = true, \ +} + + +#define BITFIELDS_ERR_CASE(name) { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ + "probed:", name), \ + .fails = true, \ +}, { \ + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ + "direct:", name), \ + .direct_raw_tp = true, \ + .fails = true, \ +} + +#define SIZE_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_size.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .relaxed_core_relocs = true + +#define SIZE_OUTPUT_DATA(type) \ + STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ + .int_sz = sizeof(((type *)0)->int_field), \ + .struct_sz = sizeof(((type *)0)->struct_field), \ + .union_sz = sizeof(((type *)0)->union_field), \ + .arr_sz = sizeof(((type *)0)->arr_field), \ + .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ + .ptr_sz = sizeof(((type *)0)->ptr_field), \ + .enum_sz = sizeof(((type *)0)->enum_field), \ + } + +#define SIZE_CASE(name) { \ + SIZE_CASE_COMMON(name), \ + .input_len = 0, \ + .output = SIZE_OUTPUT_DATA(struct core_reloc_##name), \ + .output_len = sizeof(struct core_reloc_size_output), \ +} + +#define SIZE_ERR_CASE(name) { \ + SIZE_CASE_COMMON(name), \ + .fails = true, \ +} + +struct core_reloc_test_case { + const char *case_name; + const char *bpf_obj_file; + const char *btf_src_file; + const char *input; + int input_len; + const char *output; + int output_len; + bool fails; + bool relaxed_core_relocs; + bool direct_raw_tp; +}; + +static struct core_reloc_test_case test_cases[] = { + /* validate we can find kernel image and use its BTF for relocs */ + { + .case_name = "kernel", + .bpf_obj_file = "test_core_reloc_kernel.o", + .btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */ + .input = "", + .input_len = 0, + .output = STRUCT_TO_CHAR_PTR(core_reloc_kernel_output) { + .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + .comm = "test_progs", + .comm_len = sizeof("test_progs"), + }, + .output_len = sizeof(struct core_reloc_kernel_output), + }, + + /* validate BPF program can use multiple flavors to match against + * single target BTF type + */ + FLAVORS_CASE(flavors), + + FLAVORS_ERR_CASE(flavors__err_wrong_name), + + /* various struct/enum nesting and resolution scenarios */ + NESTING_CASE(nesting), + NESTING_CASE(nesting___anon_embed), + NESTING_CASE(nesting___struct_union_mixup), + NESTING_CASE(nesting___extra_nesting), + NESTING_CASE(nesting___dup_compat_types), + + NESTING_ERR_CASE(nesting___err_missing_field), + NESTING_ERR_CASE(nesting___err_array_field), + NESTING_ERR_CASE(nesting___err_missing_container), + NESTING_ERR_CASE(nesting___err_nonstruct_container), + NESTING_ERR_CASE(nesting___err_array_container), + NESTING_ERR_CASE(nesting___err_dup_incompat_types), + NESTING_ERR_CASE(nesting___err_partial_match_dups), + NESTING_ERR_CASE(nesting___err_too_deep), + + /* various array access relocation scenarios */ + ARRAYS_CASE(arrays), + ARRAYS_CASE(arrays___diff_arr_dim), + ARRAYS_CASE(arrays___diff_arr_val_sz), + ARRAYS_CASE(arrays___equiv_zero_sz_arr), + ARRAYS_CASE(arrays___fixed_arr), + + ARRAYS_ERR_CASE(arrays___err_too_small), + ARRAYS_ERR_CASE(arrays___err_too_shallow), + ARRAYS_ERR_CASE(arrays___err_non_array), + ARRAYS_ERR_CASE(arrays___err_wrong_val_type1), + ARRAYS_ERR_CASE(arrays___err_wrong_val_type2), + ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), + + /* enum/ptr/int handling scenarios */ + PRIMITIVES_CASE(primitives), + PRIMITIVES_CASE(primitives___diff_enum_def), + PRIMITIVES_CASE(primitives___diff_func_proto), + PRIMITIVES_CASE(primitives___diff_ptr_type), + + PRIMITIVES_ERR_CASE(primitives___err_non_enum), + PRIMITIVES_ERR_CASE(primitives___err_non_int), + PRIMITIVES_ERR_CASE(primitives___err_non_ptr), + + /* const/volatile/restrict and typedefs scenarios */ + MODS_CASE(mods), + MODS_CASE(mods___mod_swap), + MODS_CASE(mods___typedefs), + + /* handling "ptr is an array" semantics */ + PTR_AS_ARR_CASE(ptr_as_arr), + PTR_AS_ARR_CASE(ptr_as_arr___diff_sz), + + /* int signedness/sizing/bitfield handling */ + INTS_CASE(ints), + INTS_CASE(ints___bool), + INTS_CASE(ints___reverse_sign), + + /* validate edge cases of capturing relocations */ + { + .case_name = "misc", + .bpf_obj_file = "test_core_reloc_misc.o", + .btf_src_file = "btf__core_reloc_misc.o", + .input = (const char *)&(struct core_reloc_misc_extensible[]){ + { .a = 1 }, + { .a = 2 }, /* not read */ + { .a = 3 }, + }, + .input_len = 4 * sizeof(int), + .output = STRUCT_TO_CHAR_PTR(core_reloc_misc_output) { + .a = 1, + .b = 1, + .c = 0, /* BUG in clang, should be 3 */ + }, + .output_len = sizeof(struct core_reloc_misc_output), + }, + + /* validate field existence checks */ + { + EXISTENCE_CASE_COMMON(existence), + .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) { + .a = 1, + .b = 2, + .c = 3, + .arr = { 4 }, + .s = { .x = 5 }, + }, + .input_len = sizeof(struct core_reloc_existence), + .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { + .a_exists = 1, + .b_exists = 1, + .c_exists = 1, + .arr_exists = 1, + .s_exists = 1, + .a_value = 1, + .b_value = 2, + .c_value = 3, + .arr_value = 4, + .s_value = 5, + }, + .output_len = sizeof(struct core_reloc_existence_output), + }, + { + EXISTENCE_CASE_COMMON(existence___minimal), + .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { + .a = 42, + }, + .input_len = sizeof(struct core_reloc_existence), + .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { + .a_exists = 1, + .b_exists = 0, + .c_exists = 0, + .arr_exists = 0, + .s_exists = 0, + .a_value = 42, + .b_value = 0xff000002u, + .c_value = 0xff000003u, + .arr_value = 0xff000004u, + .s_value = 0xff000005u, + }, + .output_len = sizeof(struct core_reloc_existence_output), + }, + + EXISTENCE_ERR_CASE(existence__err_int_sz), + EXISTENCE_ERR_CASE(existence__err_int_type), + EXISTENCE_ERR_CASE(existence__err_int_kind), + EXISTENCE_ERR_CASE(existence__err_arr_kind), + EXISTENCE_ERR_CASE(existence__err_arr_value_type), + EXISTENCE_ERR_CASE(existence__err_struct_type), + + /* bitfield relocation checks */ + BITFIELDS_CASE(bitfields, { + .ub1 = 1, + .ub2 = 2, + .ub7 = 96, + .sb4 = -7, + .sb20 = -0x76543, + .u32 = 0x80000000, + .s32 = -0x76543210, + }), + BITFIELDS_CASE(bitfields___bit_sz_change, { + .ub1 = 6, + .ub2 = 0xABCDE, + .ub7 = 1, + .sb4 = -1, + .sb20 = -0x17654321, + .u32 = 0xBEEF, + .s32 = -0x3FEDCBA987654321, + }), + BITFIELDS_CASE(bitfields___bitfield_vs_int, { + .ub1 = 0xFEDCBA9876543210, + .ub2 = 0xA6, + .ub7 = -0x7EDCBA987654321, + .sb4 = -0x6123456789ABCDE, + .sb20 = 0xD00D, + .u32 = -0x76543, + .s32 = 0x0ADEADBEEFBADB0B, + }), + BITFIELDS_CASE(bitfields___just_big_enough, { + .ub1 = 0xF, + .ub2 = 0x0812345678FEDCBA, + }), + BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield), + + /* size relocation checks */ + SIZE_CASE(size), + SIZE_CASE(size___diff_sz), +}; + +struct data { + char in[256]; + char out[256]; + uint64_t my_pid_tgid; +}; + +static size_t roundup_page(size_t sz) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + return (sz + page_size - 1) / page_size * page_size; +} + +void test_core_reloc(void) +{ + const size_t mmap_sz = roundup_page(sizeof(struct data)); + struct bpf_object_load_attr load_attr = {}; + struct core_reloc_test_case *test_case; + const char *tp_name, *probe_name; + int err, duration = 0, i, equal; + struct bpf_link *link = NULL; + struct bpf_map *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + uint64_t my_pid_tgid; + struct data *data; + void *mmap_data = NULL; + + my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + test_case = &test_cases[i]; + if (!test__start_subtest(test_case->case_name)) + continue; + + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .relaxed_core_relocs = test_case->relaxed_core_relocs, + ); + + obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); + if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", + test_case->bpf_obj_file, PTR_ERR(obj))) + continue; + + /* for typed raw tracepoints, NULL should be specified */ + if (test_case->direct_raw_tp) { + probe_name = "tp_btf/sys_enter"; + tp_name = NULL; + } else { + probe_name = "raw_tracepoint/sys_enter"; + tp_name = "sys_enter"; + } + + prog = bpf_object__find_program_by_title(obj, probe_name); + if (CHECK(!prog, "find_probe", + "prog '%s' not found\n", probe_name)) + goto cleanup; + + load_attr.obj = obj; + load_attr.log_level = 0; + load_attr.target_btf_path = test_case->btf_src_file; + err = bpf_object__load_xattr(&load_attr); + if (test_case->fails) { + CHECK(!err, "obj_load_fail", + "should fail to load prog '%s'\n", probe_name); + goto cleanup; + } else { + if (CHECK(err, "obj_load", + "failed to load prog '%s': %d\n", + probe_name, err)) + goto cleanup; + } + + data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto cleanup; + + mmap_data = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_SHARED, bpf_map__fd(data_map), 0); + if (CHECK(mmap_data == MAP_FAILED, "mmap", + ".bss mmap failed: %d", errno)) { + mmap_data = NULL; + goto cleanup; + } + data = mmap_data; + + memset(mmap_data, 0, sizeof(*data)); + memcpy(data->in, test_case->input, test_case->input_len); + data->my_pid_tgid = my_pid_tgid; + + link = bpf_program__attach_raw_tracepoint(prog, tp_name); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", + PTR_ERR(link))) + goto cleanup; + + /* trigger test run */ + usleep(1); + + equal = memcmp(data->out, test_case->output, + test_case->output_len) == 0; + if (CHECK(!equal, "check_result", + "input/output data don't match\n")) { + int j; + + for (j = 0; j < test_case->input_len; j++) { + printf("input byte #%d: 0x%02hhx\n", + j, test_case->input[j]); + } + for (j = 0; j < test_case->output_len; j++) { + printf("output byte #%d: EXP 0x%02hhx GOT 0x%02hhx\n", + j, test_case->output[j], data->out[j]); + } + goto cleanup; + } + +cleanup: + if (mmap_data) { + CHECK_FAIL(munmap(mmap_data, mmap_sz)); + mmap_data = NULL; + } + if (!IS_ERR_OR_NULL(link)) { + bpf_link__destroy(link); + link = NULL; + } + bpf_object__close(obj); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c new file mode 100644 index 000000000000..f7c7e25232be --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/btf.h> +#include "bpf/libbpf_internal.h" + +static int duration = 0; + +static void validate_mask(int case_nr, const char *exp, bool *mask, int n) +{ + int i; + + for (i = 0; exp[i]; i++) { + if (exp[i] == '1') { + if (CHECK(i + 1 > n, "mask_short", + "case #%d: mask too short, got n=%d, need at least %d\n", + case_nr, n, i + 1)) + return; + CHECK(!mask[i], "cpu_not_set", + "case #%d: mask differs, expected cpu#%d SET\n", + case_nr, i); + } else { + CHECK(i < n && mask[i], "cpu_set", + "case #%d: mask differs, expected cpu#%d UNSET\n", + case_nr, i); + } + } + CHECK(i < n, "mask_long", + "case #%d: mask too long, got n=%d, expected at most %d\n", + case_nr, n, i); +} + +static struct { + const char *cpu_mask; + const char *expect; + bool fails; +} test_cases[] = { + { "0\n", "1", false }, + { "0,2\n", "101", false }, + { "0-2\n", "111", false }, + { "0-2,3-4\n", "11111", false }, + { "0", "1", false }, + { "0-2", "111", false }, + { "0,2", "101", false }, + { "0,1-3", "1111", false }, + { "0,1,2,3", "1111", false }, + { "0,2-3,5", "101101", false }, + { "3-3", "0001", false }, + { "2-4,6,9-10", "00111010011", false }, + /* failure cases */ + { "", "", true }, + { "0-", "", true }, + { "0 ", "", true }, + { "0_1", "", true }, + { "1-0", "", true }, + { "-1", "", true }, +}; + +void test_cpu_mask() +{ + int i, err, n; + bool *mask; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + mask = NULL; + err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n); + if (test_cases[i].fails) { + CHECK(!err, "should_fail", + "case #%d: parsing should fail!\n", i + 1); + } else { + if (CHECK(err, "parse_err", + "case #%d: cpu mask parsing failed: %d\n", + i + 1, err)) + continue; + validate_mask(i + 1, test_cases[i].expect, mask, n); + } + free(mask); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c new file mode 100644 index 000000000000..235ac4f67f5b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include "test_pkt_access.skel.h" +#include "fentry_test.skel.h" +#include "fexit_test.skel.h" + +void test_fentry_fexit(void) +{ + struct test_pkt_access *pkt_skel = NULL; + struct fentry_test *fentry_skel = NULL; + struct fexit_test *fexit_skel = NULL; + __u64 *fentry_res, *fexit_res; + __u32 duration = 0, retval; + int err, pkt_fd, i; + + pkt_skel = test_pkt_access__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) + return; + fentry_skel = fentry_test__open_and_load(); + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) + goto close_prog; + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto close_prog; + + err = fentry_test__attach(fentry_skel); + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) + goto close_prog; + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto close_prog; + + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + fentry_res = (__u64 *)fentry_skel->bss; + fexit_res = (__u64 *)fexit_skel->bss; + printf("%lld\n", fentry_skel->bss->test1_result); + for (i = 0; i < 6; i++) { + CHECK(fentry_res[i] != 1, "result", + "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); + CHECK(fexit_res[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]); + } + +close_prog: + test_pkt_access__destroy(pkt_skel); + fentry_test__destroy(fentry_skel); + fexit_test__destroy(fexit_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c new file mode 100644 index 000000000000..5cc06021f27d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include "test_pkt_access.skel.h" +#include "fentry_test.skel.h" + +void test_fentry_test(void) +{ + struct test_pkt_access *pkt_skel = NULL; + struct fentry_test *fentry_skel = NULL; + int err, pkt_fd, i; + __u32 duration = 0, retval; + __u64 *result; + + pkt_skel = test_pkt_access__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) + return; + fentry_skel = fentry_test__open_and_load(); + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) + goto cleanup; + + err = fentry_test__attach(fentry_skel); + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) + goto cleanup; + + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + result = (__u64 *)fentry_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fentry_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } + +cleanup: + fentry_test__destroy(fentry_skel); + test_pkt_access__destroy(pkt_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c new file mode 100644 index 000000000000..cde463af7071 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +static void test_fexit_bpf2bpf_common(const char *obj_file, + const char *target_obj_file, + int prog_cnt, + const char **prog_name) +{ + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd, i; + struct bpf_link **link = NULL; + struct bpf_program **prog = NULL; + __u32 duration = 0, retval; + struct bpf_map *data_map; + const int zero = 0; + u64 *result = NULL; + + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); + + link = calloc(sizeof(struct bpf_link *), prog_cnt); + prog = calloc(sizeof(struct bpf_program *), prog_cnt); + result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64)); + if (CHECK(!link || !prog || !result, "alloc_memory", + "failed to alloc memory")) + goto close_prog; + + obj = bpf_object__open_file(obj_file, &opts); + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", + "failed to open fexit_bpf2bpf: %ld\n", + PTR_ERR(obj))) + goto close_prog; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto close_prog; + + for (i = 0; i < prog_cnt; i++) { + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i])) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < prog_cnt; i++) + if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n", + result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < prog_cnt; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + if (!IS_ERR_OR_NULL(obj)) + bpf_object__close(obj); + bpf_object__close(pkt_obj); + free(link); + free(prog); + free(result); +} + +static void test_target_no_callees(void) +{ + const char *prog_name[] = { + "fexit/test_pkt_md_access", + }; + test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", + "./test_pkt_md_access.o", + ARRAY_SIZE(prog_name), + prog_name); +} + +static void test_target_yes_callees(void) +{ + const char *prog_name[] = { + "fexit/test_pkt_access", + "fexit/test_pkt_access_subprog1", + "fexit/test_pkt_access_subprog2", + "fexit/test_pkt_access_subprog3", + }; + test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name); +} + +static void test_func_replace(void) +{ + const char *prog_name[] = { + "fexit/test_pkt_access", + "fexit/test_pkt_access_subprog1", + "fexit/test_pkt_access_subprog2", + "fexit/test_pkt_access_subprog3", + "freplace/get_skb_len", + "freplace/get_skb_ifindex", + "freplace/get_constant", + }; + test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name); +} + +void test_fexit_bpf2bpf(void) +{ + test_target_no_callees(); + test_target_yes_callees(); + test_func_replace(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c new file mode 100644 index 000000000000..3b9dbf7433f0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +/* x86-64 fits 55 JITed and 43 interpreted progs into half page */ +#define CNT 40 + +void test_fexit_stress(void) +{ + char test_skb[128] = {}; + int fexit_fd[CNT] = {}; + int link_fd[CNT] = {}; + __u32 duration = 0; + char error[4096]; + __u32 prog_ret; + int err, i, filter_fd; + + const struct bpf_insn trace_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr load_attr = { + .prog_type = BPF_PROG_TYPE_TRACING, + .license = "GPL", + .insns = trace_program, + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), + .expected_attach_type = BPF_TRACE_FEXIT, + }; + + const struct bpf_insn skb_program[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + + struct bpf_load_program_attr skb_load_attr = { + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .license = "GPL", + .insns = skb_program, + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), + }; + + err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", + load_attr.expected_attach_type); + if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) + goto out; + load_attr.attach_btf_id = err; + + for (i = 0; i < CNT; i++) { + fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); + if (CHECK(fexit_fd[i] < 0, "fexit loaded", + "failed: %d errno %d\n", fexit_fd[i], errno)) + goto out; + link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); + if (CHECK(link_fd[i] < 0, "fexit attach failed", + "prog %d failed: %d err %d\n", i, link_fd[i], errno)) + goto out; + } + + filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", + filter_fd, errno)) + goto out; + + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, + 0, &prog_ret, 0); + close(filter_fd); + CHECK_FAIL(err); +out: + for (i = 0; i < CNT; i++) { + if (link_fd[i]) + close(link_fd[i]); + if (fexit_fd[i]) + close(fexit_fd[i]); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c new file mode 100644 index 000000000000..d2c3655dd7a3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> + +void test_fexit_test(void) +{ + struct bpf_prog_load_attr attr = { + .file = "./fexit_test.o", + }; + + char prog_name[] = "fexit/bpf_fentry_testX"; + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd, kfree_skb_fd, i; + struct bpf_link *link[6] = {}; + struct bpf_program *prog[6]; + __u32 duration = 0, retval; + struct bpf_map *data_map; + const int zero = 0; + u64 result[6]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &pkt_obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + goto close_prog; + + for (i = 0; i < 6; i++) { + prog_name[sizeof(prog_name) - 2] = '1' + i; + prog[i] = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) + goto close_prog; + link[i] = bpf_program__attach_trace(prog[i]); + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) + goto close_prog; + } + data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + for (i = 0; i < 6; i++) + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", + i + 1, result[i])) + goto close_prog; + +close_prog: + for (i = 0; i < 6; i++) + if (!IS_ERR_OR_NULL(link[i])) + bpf_link__destroy(link[i]); + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index c938283ac232..92563898867c 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -5,6 +5,10 @@ #include <linux/if_tun.h> #include <sys/uio.h> +#ifndef IP_MF +#define IP_MF 0x2000 +#endif + #define CHECK_FLOW_KEYS(desc, got, expected) \ CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ desc, \ @@ -16,6 +20,7 @@ "is_encap=%u/%u " \ "ip_proto=0x%x/0x%x " \ "n_proto=0x%x/0x%x " \ + "flow_label=0x%x/0x%x " \ "sport=%u/%u " \ "dport=%u/%u\n", \ got.nhoff, expected.nhoff, \ @@ -26,6 +31,7 @@ got.is_encap, expected.is_encap, \ got.ip_proto, expected.ip_proto, \ got.n_proto, expected.n_proto, \ + got.flow_label, expected.flow_label, \ got.sport, expected.sport, \ got.dport, expected.dport) @@ -35,6 +41,13 @@ struct ipv4_pkt { struct tcphdr tcp; } __packed; +struct ipip_pkt { + struct ethhdr eth; + struct iphdr iph; + struct iphdr iph_inner; + struct tcphdr tcp; +} __packed; + struct svlan_ipv4_pkt { struct ethhdr eth; __u16 vlan_tci; @@ -49,6 +62,18 @@ struct ipv6_pkt { struct tcphdr tcp; } __packed; +struct ipv6_frag_pkt { + struct ethhdr eth; + struct ipv6hdr iph; + struct frag_hdr { + __u8 nexthdr; + __u8 reserved; + __be16 frag_off; + __be32 identification; + } ipf; + struct tcphdr tcp; +} __packed; + struct dvlan_ipv6_pkt { struct ethhdr eth; __u16 vlan_tci; @@ -64,10 +89,13 @@ struct test { union { struct ipv4_pkt ipv4; struct svlan_ipv4_pkt svlan_ipv4; + struct ipip_pkt ipip; struct ipv6_pkt ipv6; + struct ipv6_frag_pkt ipv6_frag; struct dvlan_ipv6_pkt dvlan_ipv6; } pkt; struct bpf_flow_keys keys; + __u32 flags; }; #define VLAN_HLEN 4 @@ -81,6 +109,8 @@ struct test tests[] = { .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, @@ -88,6 +118,8 @@ struct test tests[] = { .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), + .sport = 80, + .dport = 8080, }, }, { @@ -97,6 +129,8 @@ struct test tests[] = { .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN, @@ -104,6 +138,8 @@ struct test tests[] = { .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, }, }, { @@ -115,6 +151,8 @@ struct test tests[] = { .iph.protocol = IPPROTO_TCP, .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN + VLAN_HLEN, @@ -122,6 +160,8 @@ struct test tests[] = { .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), + .sport = 80, + .dport = 8080, }, }, { @@ -133,6 +173,8 @@ struct test tests[] = { .iph.nexthdr = IPPROTO_TCP, .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, }, .keys = { .nhoff = ETH_HLEN + VLAN_HLEN * 2, @@ -141,8 +183,205 @@ struct test tests[] = { .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, + }, + }, + { + .name = "ipv4-frag", + .pkt.ipv4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.frag_off = __bpf_constant_htons(IP_MF), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_frag = true, + .is_first_frag = true, + .sport = 80, + .dport = 8080, + }, + .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, + }, + { + .name = "ipv4-no-frag", + .pkt.ipv4 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_TCP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.frag_off = __bpf_constant_htons(IP_MF), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_frag = true, + .is_first_frag = true, + }, + }, + { + .name = "ipv6-frag", + .pkt.ipv6_frag = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_FRAGMENT, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .ipf.nexthdr = IPPROTO_TCP, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr) + + sizeof(struct frag_hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .is_frag = true, + .is_first_frag = true, + .sport = 80, + .dport = 8080, + }, + .flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG, + }, + { + .name = "ipv6-no-frag", + .pkt.ipv6_frag = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_FRAGMENT, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .ipf.nexthdr = IPPROTO_TCP, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr) + + sizeof(struct frag_hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .is_frag = true, + .is_first_frag = true, + }, + }, + { + .name = "ipv6-flow-label", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.flow_lbl = { 0xb, 0xee, 0xef }, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .sport = 80, + .dport = 8080, + .flow_label = __bpf_constant_htonl(0xbeeef), }, }, + { + .name = "ipv6-no-flow-label", + .pkt.ipv6 = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6), + .iph.nexthdr = IPPROTO_TCP, + .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES), + .iph.flow_lbl = { 0xb, 0xee, 0xef }, + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), + .addr_proto = ETH_P_IPV6, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IPV6), + .flow_label = __bpf_constant_htonl(0xbeeef), + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL, + }, + { + .name = "ipip-encap", + .pkt.ipip = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_IPIP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES) - + sizeof(struct iphdr), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr) + + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_TCP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + .sport = 80, + .dport = 8080, + }, + }, + { + .name = "ipip-no-encap", + .pkt.ipip = { + .eth.h_proto = __bpf_constant_htons(ETH_P_IP), + .iph.ihl = 5, + .iph.protocol = IPPROTO_IPIP, + .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES), + .iph_inner.ihl = 5, + .iph_inner.protocol = IPPROTO_TCP, + .iph_inner.tot_len = + __bpf_constant_htons(MAGIC_BYTES) - + sizeof(struct iphdr), + .tcp.doff = 5, + .tcp.source = 80, + .tcp.dest = 8080, + }, + .keys = { + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), + .addr_proto = ETH_P_IP, + .ip_proto = IPPROTO_IPIP, + .n_proto = __bpf_constant_htons(ETH_P_IP), + .is_encap = true, + }, + .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP, + }, }; static int create_tap(const char *ifname) @@ -212,10 +451,8 @@ void test_flow_dissector(void) err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", "jmp_table", "last_dissection", &prog_fd, &keys_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } for (i = 0; i < ARRAY_SIZE(tests); i++) { struct bpf_flow_keys flow_keys; @@ -225,6 +462,13 @@ void test_flow_dissector(void) .data_size_in = sizeof(tests[i].pkt), .data_out = &flow_keys, }; + static struct bpf_flow_keys ctx = {}; + + if (tests[i].flags) { + tattr.ctx_in = &ctx; + tattr.ctx_size_in = sizeof(ctx); + ctx.flags = tests[i].flags; + } err = bpf_prog_test_run_xattr(&tattr); CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) || @@ -251,9 +495,20 @@ void test_flow_dissector(void) CHECK(err, "ifup", "err %d errno %d\n", err, errno); for (i = 0; i < ARRAY_SIZE(tests); i++) { - struct bpf_flow_keys flow_keys = {}; + /* Keep in sync with 'flags' from eth_get_headlen. */ + __u32 eth_get_headlen_flags = + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG; struct bpf_prog_test_run_attr tattr = {}; - __u32 key = 0; + struct bpf_flow_keys flow_keys = {}; + __u32 key = (__u32)(tests[i].keys.sport) << 16 | + tests[i].keys.dport; + + /* For skb-less case we can't pass input flags; run + * only the tests that have a matching set of flags. + */ + + if (tests[i].flags != eth_get_headlen_flags) + continue; err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno); @@ -263,6 +518,9 @@ void test_flow_dissector(void) CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + + err = bpf_map_delete_elem(keys_fd, &key); + CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err); } bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c new file mode 100644 index 000000000000..1f51ba66b98b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test that the flow_dissector program can be updated with a single + * syscall by attaching a new program that replaces the existing one. + * + * Corner case - the same program cannot be attached twice. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdbool.h> +#include <unistd.h> + +#include <linux/bpf.h> +#include <bpf/bpf.h> + +#include "test_progs.h" + +static bool is_attached(int netns) +{ + __u32 cnt; + int err; + + err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt); + if (CHECK_FAIL(err)) { + perror("bpf_prog_query"); + return true; /* fail-safe */ + } + + return cnt > 0; +} + +static int load_prog(void) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, BPF_OK), + BPF_EXIT_INSN(), + }; + int fd; + + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + if (CHECK_FAIL(fd < 0)) + perror("bpf_load_program"); + + return fd; +} + +static void do_flow_dissector_reattach(void) +{ + int prog_fd[2] = { -1, -1 }; + int err; + + prog_fd[0] = load_prog(); + if (prog_fd[0] < 0) + return; + + prog_fd[1] = load_prog(); + if (prog_fd[1] < 0) + goto out_close; + + err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach-0"); + goto out_close; + } + + /* Expect success when attaching a different program */ + err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(err)) { + perror("bpf_prog_attach-1"); + goto out_detach; + } + + /* Expect failure when attaching the same program twice */ + err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0); + if (CHECK_FAIL(!err || errno != EINVAL)) + perror("bpf_prog_attach-2"); + +out_detach: + err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR); + if (CHECK_FAIL(err)) + perror("bpf_prog_detach"); + +out_close: + close(prog_fd[1]); + close(prog_fd[0]); +} + +void test_flow_dissector_reattach(void) +{ + int init_net, self_net, err; + + self_net = open("/proc/self/ns/net", O_RDONLY); + if (CHECK_FAIL(self_net < 0)) { + perror("open(/proc/self/ns/net"); + return; + } + + init_net = open("/proc/1/ns/net", O_RDONLY); + if (CHECK_FAIL(init_net < 0)) { + perror("open(/proc/1/ns/net)"); + goto out_close; + } + + err = setns(init_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("setns(/proc/1/ns/net)"); + goto out_close; + } + + if (is_attached(init_net)) { + test__skip(); + printf("Can't test with flow dissector attached to init_net\n"); + goto out_setns; + } + + /* First run tests in root network namespace */ + do_flow_dissector_reattach(); + + /* Then repeat tests in a non-root namespace */ + err = unshare(CLONE_NEWNET); + if (CHECK_FAIL(err)) { + perror("unshare(CLONE_NEWNET)"); + goto out_setns; + } + do_flow_dissector_reattach(); + +out_setns: + /* Move back to netns we started in. */ + err = setns(self_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) + perror("setns(/proc/self/ns/net)"); + +out_close: + close(init_net); + close(self_net); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index c2a0a9d5591b..eba9a970703b 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -1,8 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <pthread.h> +#include <sched.h> +#include <sys/socket.h> #include <test_progs.h> #define MAX_CNT_RAWTP 10ull #define MAX_STACK_RAWTP 100 + +static int duration = 0; + struct get_stack_trace_t { int pid; int kern_stack_size; @@ -13,7 +20,7 @@ struct get_stack_trace_t { struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; }; -static int get_stack_print_output(void *data, int size) +static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) { bool good_kern_stack = false, good_user_stack = false; const char *nonjit_func = "___bpf_prog_run"; @@ -34,7 +41,7 @@ static int get_stack_print_output(void *data, int size) * just assume it is good if the stack is not empty. * This could be improved in the future. */ - if (jit_enabled) { + if (env.jit_enabled) { found = num_stack > 0; } else { for (i = 0; i < num_stack; i++) { @@ -51,7 +58,7 @@ static int get_stack_print_output(void *data, int size) } } else { num_stack = e->kern_stack_size / sizeof(__u64); - if (jit_enabled) { + if (env.jit_enabled) { good_kern_stack = num_stack > 0; } else { for (i = 0; i < num_stack; i++) { @@ -65,75 +72,73 @@ static int get_stack_print_output(void *data, int size) if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0) good_user_stack = true; } - if (!good_kern_stack || !good_user_stack) - return LIBBPF_PERF_EVENT_ERROR; - if (cnt == MAX_CNT_RAWTP) - return LIBBPF_PERF_EVENT_DONE; - - return LIBBPF_PERF_EVENT_CONT; + if (!good_kern_stack) + CHECK(!good_kern_stack, "kern_stack", "corrupted kernel stack\n"); + if (!good_user_stack) + CHECK(!good_user_stack, "user_stack", "corrupted user stack\n"); } void test_get_stack_raw_tp(void) { const char *file = "./test_get_stack_rawtp.o"; - int i, efd, err, prog_fd, pmu_fd, perfmap_fd; - struct perf_event_attr attr = {}; + const char *prog_name = "raw_tracepoint/sys_enter"; + int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP; + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb = NULL; + struct bpf_link *link = NULL; struct timespec tv = {0, 10}; - __u32 key = 0, duration = 0; + struct bpf_program *prog; struct bpf_object *obj; + struct bpf_map *map; + cpu_set_t cpu_set; err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) return; - efd = bpf_raw_tracepoint_open("sys_enter", prog_fd); - if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno)) + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) goto close_prog; - perfmap_fd = bpf_find_map(__func__, obj, "perfmap"); - if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n", - perfmap_fd, errno)) + map = bpf_object__find_map_by_name(obj, "perfmap"); + if (CHECK(!map, "bpf_find_map", "not found\n")) goto close_prog; err = load_kallsyms(); if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno)) goto close_prog; - attr.sample_type = PERF_SAMPLE_RAW; - attr.type = PERF_TYPE_SOFTWARE; - attr.config = PERF_COUNT_SW_BPF_OUTPUT; - pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/, - -1/*group_fd*/, 0); - if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, - errno)) + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno)) goto close_prog; - err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY); - if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err, - errno)) + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) goto close_prog; - err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n", - err, errno)) - goto close_prog; - - err = perf_event_mmap(pmu_fd); - if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno)) + pb_opts.sample_cb = get_stack_print_output; + pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) goto close_prog; /* trigger some syscall action */ for (i = 0; i < MAX_CNT_RAWTP; i++) nanosleep(&tv, NULL); - err = perf_event_poller(pmu_fd, get_stack_print_output); - if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno)) - goto close_prog; + while (exp_cnt > 0) { + err = perf_buffer__poll(pb, 100); + if (err < 0 && CHECK(err < 0, "pb__poll", "err %d\n", err)) + goto close_prog; + exp_cnt -= err; + } - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: + if (!IS_ERR_OR_NULL(link)) + bpf_link__destroy(link); + if (!IS_ERR_OR_NULL(pb)) + perf_buffer__free(pb); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c index d011079fb0bf..c680926fce73 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data.c @@ -7,10 +7,8 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration) uint64_t num; map_fd = bpf_find_map(__func__, obj, "result_number"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -44,10 +42,8 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration) char str[32]; map_fd = bpf_find_map(__func__, obj, "result_string"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -81,10 +77,8 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration) struct foo val; map_fd = bpf_find_map(__func__, obj, "result_struct"); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } struct { char *name; @@ -112,16 +106,12 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration) __u8 *buff; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); - if (!map || !bpf_map__is_internal(map)) { - error_cnt++; + if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) return; - } map_fd = bpf_map__fd(map); - if (map_fd < 0) { - error_cnt++; + if (CHECK_FAIL(map_fd < 0)) return; - } buff = malloc(bpf_map__def(map)->value_size); if (buff) diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c new file mode 100644 index 000000000000..7507c8f689bc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +struct meta { + int ifindex; + __u32 cb32_0; + __u8 cb8_0; +}; + +static union { + __u32 cb32[5]; + __u8 cb8[20]; +} cb = { + .cb32[0] = 0x81828384, +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + struct meta *meta = (struct meta *)data; + struct ipv6_packet *pkt_v6 = data + sizeof(*meta); + int duration = 0; + + if (CHECK(size != 72 + sizeof(*meta), "check_size", "size %u != %zu\n", + size, 72 + sizeof(*meta))) + return; + if (CHECK(meta->ifindex != 1, "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + /* spurious kfree_skb not on loopback device */ + return; + if (CHECK(meta->cb8_0 != cb.cb8[0], "check_cb8_0", "cb8_0 %x != %x\n", + meta->cb8_0, cb.cb8[0])) + return; + if (CHECK(meta->cb32_0 != cb.cb32[0], "check_cb32_0", + "cb32_0 %x != %x\n", + meta->cb32_0, cb.cb32[0])) + return; + if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth", + "h_proto %x\n", pkt_v6->eth.h_proto)) + return; + if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip", + "iph.nexthdr %x\n", pkt_v6->iph.nexthdr)) + return; + if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp", + "tcp.doff %x\n", pkt_v6->tcp.doff)) + return; + + *(bool *)ctx = true; +} + +void test_kfree_skb(void) +{ + struct __sk_buff skb = {}; + struct bpf_prog_test_run_attr tattr = { + .data_in = &pkt_v6, + .data_size_in = sizeof(pkt_v6), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + }; + struct bpf_prog_load_attr attr = { + .file = "./kfree_skb.o", + }; + + struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; + struct bpf_map *perf_buf_map, *global_data; + struct bpf_program *prog, *fentry, *fexit; + struct bpf_object *obj, *obj2 = NULL; + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb = NULL; + int err, kfree_skb_fd; + bool passed = false; + __u32 duration = 0; + const int zero = 0; + bool test_ok[2]; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &tattr.prog_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + + err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + goto close_prog; + + prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); + if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) + goto close_prog; + fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); + if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); + if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) + goto close_prog; + + global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); + if (CHECK(!global_data, "find global data", "not found\n")) + goto close_prog; + + link = bpf_program__attach_raw_tracepoint(prog, NULL); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + goto close_prog; + link_fentry = bpf_program__attach_trace(fentry); + if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n", + PTR_ERR(link_fentry))) + goto close_prog; + link_fexit = bpf_program__attach_trace(fexit); + if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n", + PTR_ERR(link_fexit))) + goto close_prog; + + perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); + if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) + goto close_prog; + + /* set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto close_prog; + + memcpy(skb.cb, &cb, sizeof(cb)); + err = bpf_prog_test_run_xattr(&tattr); + duration = tattr.duration; + CHECK(err || tattr.retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, tattr.retval, duration); + + /* read perf buffer */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto close_prog; + + /* make sure kfree_skb program was triggered + * and it sent expected skb into ring buffer + */ + CHECK_FAIL(!passed); + + err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); + if (CHECK(err, "get_result", + "failed to get output data: %d\n", err)) + goto close_prog; + + CHECK_FAIL(!test_ok[0] || !test_ok[1]); +close_prog: + perf_buffer__free(pb); + if (!IS_ERR_OR_NULL(link)) + bpf_link__destroy(link); + if (!IS_ERR_OR_NULL(link_fentry)) + bpf_link__destroy(link_fentry); + if (!IS_ERR_OR_NULL(link_fexit)) + bpf_link__destroy(link_fexit); + bpf_object__close(obj); + bpf_object__close(obj2); +} diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index 20ddca830e68..eaf64595be88 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -30,10 +30,8 @@ static void test_l4lb(const char *file) u32 *magic = (u32 *)buf; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip_map"); if (map_fd < 0) @@ -72,10 +70,9 @@ static void test_l4lb(const char *file) bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { - error_cnt++; + if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || + pkts != NUM_ITER * 2)) printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts); - } out: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index ee99368c595c..8f91f1881d11 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -8,14 +8,12 @@ static void *parallel_map_access(void *arg) for (i = 0; i < 10000; i++) { err = bpf_map_lookup_elem_flags(map_fd, &key, vars, BPF_F_LOCK); - if (err) { + if (CHECK_FAIL(err)) { printf("lookup failed\n"); - error_cnt++; goto out; } - if (vars[0] != 0) { + if (CHECK_FAIL(vars[0] != 0)) { printf("lookup #%d var[0]=%d\n", i, vars[0]); - error_cnt++; goto out; } rnd = vars[1]; @@ -24,7 +22,7 @@ static void *parallel_map_access(void *arg) continue; printf("lookup #%d var[1]=%d var[%d]=%d\n", i, rnd, j, vars[j]); - error_cnt++; + CHECK_FAIL(vars[j] != rnd); goto out; } } @@ -42,34 +40,36 @@ void test_map_lock(void) void *ret; err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (err) { + if (CHECK_FAIL(err)) { printf("test_map_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } map_fd[0] = bpf_find_map(__func__, obj, "hash_map"); - if (map_fd[0] < 0) + if (CHECK_FAIL(map_fd[0] < 0)) goto close_prog; map_fd[1] = bpf_find_map(__func__, obj, "array_map"); - if (map_fd[1] < 0) + if (CHECK_FAIL(map_fd[1] < 0)) goto close_prog; bpf_map_update_elem(map_fd[0], &key, vars, BPF_F_LOCK); for (i = 0; i < 4; i++) - assert(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd))) + goto close_prog; for (i = 4; i < 6; i++) - assert(pthread_create(&thread_id[i], NULL, - ¶llel_map_access, &map_fd[i - 4]) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + ¶llel_map_access, + &map_fd[i - 4]))) + goto close_prog; for (i = 0; i < 4; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&prog_fd); + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&prog_fd)) + goto close_prog; for (i = 4; i < 6; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&map_fd[i - 4]); - goto close_prog_noerr; + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&map_fd[i - 4])) + goto close_prog; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c new file mode 100644 index 000000000000..16a814eb4d64 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <sys/mman.h> +#include "test_mmap.skel.h" + +struct map_data { + __u64 val[512 * 4]; +}; + +static size_t roundup_page(size_t sz) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + return (sz + page_size - 1) / page_size * page_size; +} + +void test_mmap(void) +{ + const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss)); + const size_t map_sz = roundup_page(sizeof(struct map_data)); + const int zero = 0, one = 1, two = 2, far = 1500; + const long page_size = sysconf(_SC_PAGE_SIZE); + int err, duration = 0, i, data_map_fd; + struct bpf_map *data_map, *bss_map; + void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; + struct test_mmap__bss *bss_data; + struct map_data *map_data; + struct test_mmap *skel; + __u64 val = 0; + + + skel = test_mmap__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + bss_map = skel->maps.bss; + data_map = skel->maps.data_map; + data_map_fd = bpf_map__fd(data_map); + + bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + bpf_map__fd(bss_map), 0); + if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap", + ".bss mmap failed: %d\n", errno)) { + bss_mmaped = NULL; + goto cleanup; + } + /* map as R/W first */ + map_mmaped = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + data_map_fd, 0); + if (CHECK(map_mmaped == MAP_FAILED, "data_mmap", + "data_map mmap failed: %d\n", errno)) { + map_mmaped = NULL; + goto cleanup; + } + + bss_data = bss_mmaped; + map_data = map_mmaped; + + CHECK_FAIL(bss_data->in_val); + CHECK_FAIL(bss_data->out_val); + CHECK_FAIL(skel->bss->in_val); + CHECK_FAIL(skel->bss->out_val); + CHECK_FAIL(map_data->val[0]); + CHECK_FAIL(map_data->val[1]); + CHECK_FAIL(map_data->val[2]); + CHECK_FAIL(map_data->val[far]); + + err = test_mmap__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + bss_data->in_val = 123; + val = 111; + CHECK_FAIL(bpf_map_update_elem(data_map_fd, &zero, &val, 0)); + + usleep(1); + + CHECK_FAIL(bss_data->in_val != 123); + CHECK_FAIL(bss_data->out_val != 123); + CHECK_FAIL(skel->bss->in_val != 123); + CHECK_FAIL(skel->bss->out_val != 123); + CHECK_FAIL(map_data->val[0] != 111); + CHECK_FAIL(map_data->val[1] != 222); + CHECK_FAIL(map_data->val[2] != 123); + CHECK_FAIL(map_data->val[far] != 3 * 123); + + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &zero, &val)); + CHECK_FAIL(val != 111); + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &one, &val)); + CHECK_FAIL(val != 222); + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &two, &val)); + CHECK_FAIL(val != 123); + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &far, &val)); + CHECK_FAIL(val != 3 * 123); + + /* data_map freeze should fail due to R/W mmap() */ + err = bpf_map_freeze(data_map_fd); + if (CHECK(!err || errno != EBUSY, "no_freeze", + "data_map freeze succeeded: err=%d, errno=%d\n", err, errno)) + goto cleanup; + + /* unmap R/W mapping */ + err = munmap(map_mmaped, map_sz); + map_mmaped = NULL; + if (CHECK(err, "data_map_munmap", "data_map munmap failed: %d\n", errno)) + goto cleanup; + + /* re-map as R/O now */ + map_mmaped = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0); + if (CHECK(map_mmaped == MAP_FAILED, "data_mmap", + "data_map R/O mmap failed: %d\n", errno)) { + map_mmaped = NULL; + goto cleanup; + } + map_data = map_mmaped; + + /* map/unmap in a loop to test ref counting */ + for (i = 0; i < 10; i++) { + int flags = i % 2 ? PROT_READ : PROT_WRITE; + void *p; + + p = mmap(NULL, map_sz, flags, MAP_SHARED, data_map_fd, 0); + if (CHECK_FAIL(p == MAP_FAILED)) + goto cleanup; + err = munmap(p, map_sz); + if (CHECK_FAIL(err)) + goto cleanup; + } + + /* data_map freeze should now succeed due to no R/W mapping */ + err = bpf_map_freeze(data_map_fd); + if (CHECK(err, "freeze", "data_map freeze failed: err=%d, errno=%d\n", + err, errno)) + goto cleanup; + + /* mapping as R/W now should fail */ + tmp1 = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, + data_map_fd, 0); + if (CHECK(tmp1 != MAP_FAILED, "data_mmap", "mmap succeeded\n")) { + munmap(tmp1, map_sz); + goto cleanup; + } + + bss_data->in_val = 321; + usleep(1); + CHECK_FAIL(bss_data->in_val != 321); + CHECK_FAIL(bss_data->out_val != 321); + CHECK_FAIL(skel->bss->in_val != 321); + CHECK_FAIL(skel->bss->out_val != 321); + CHECK_FAIL(map_data->val[0] != 111); + CHECK_FAIL(map_data->val[1] != 222); + CHECK_FAIL(map_data->val[2] != 321); + CHECK_FAIL(map_data->val[far] != 3 * 321); + + /* check some more advanced mmap() manipulations */ + + /* map all but last page: pages 1-3 mapped */ + tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED, + data_map_fd, 0); + if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno)) + goto cleanup; + + /* unmap second page: pages 1, 3 mapped */ + err = munmap(tmp1 + page_size, page_size); + if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) { + munmap(tmp1, map_sz); + goto cleanup; + } + + /* map page 2 back */ + tmp2 = mmap(tmp1 + page_size, page_size, PROT_READ, + MAP_SHARED | MAP_FIXED, data_map_fd, 0); + if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) { + munmap(tmp1, page_size); + munmap(tmp1 + 2*page_size, page_size); + goto cleanup; + } + CHECK(tmp1 + page_size != tmp2, "adv_mmap4", + "tmp1: %p, tmp2: %p\n", tmp1, tmp2); + + /* re-map all 4 pages */ + tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, + data_map_fd, 0); + if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) { + munmap(tmp1, 3 * page_size); /* unmap page 1 */ + goto cleanup; + } + CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2); + + map_data = tmp2; + CHECK_FAIL(bss_data->in_val != 321); + CHECK_FAIL(bss_data->out_val != 321); + CHECK_FAIL(skel->bss->in_val != 321); + CHECK_FAIL(skel->bss->out_val != 321); + CHECK_FAIL(map_data->val[0] != 111); + CHECK_FAIL(map_data->val[1] != 222); + CHECK_FAIL(map_data->val[2] != 321); + CHECK_FAIL(map_data->val[far] != 3 * 321); + + munmap(tmp2, 4 * page_size); +cleanup: + if (bss_mmaped) + CHECK_FAIL(munmap(bss_mmaped, bss_sz)); + if (map_mmaped) + CHECK_FAIL(munmap(map_mmaped, map_sz)); + test_mmap__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3003fddc0613..1450ea2dd4cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,6 +4,7 @@ #include <sched.h> #include <sys/socket.h> #include <test_progs.h> +#include "bpf/libbpf_internal.h" static void on_sample(void *ctx, int cpu, void *data, __u32 size) { @@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void test_perf_buffer(void) { - int err, prog_fd, nr_cpus, i, duration = 0; + int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; const char *prog_name = "kprobe/sys_nanosleep"; const char *file = "./test_perf_buffer.o"; struct perf_buffer_opts pb_opts = {}; @@ -29,15 +30,27 @@ void test_perf_buffer(void) struct bpf_object *obj; struct perf_buffer *pb; struct bpf_link *link; + bool *online; nr_cpus = libbpf_num_possible_cpus(); if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) return; + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", + &online, &on_len); + if (CHECK(err, "nr_on_cpus", "err %d\n", err)) + return; + + for (i = 0; i < on_len; i++) + if (online[i]) + nr_on_cpus++; + /* load program */ err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) - return; + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out_close; + } prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) @@ -64,6 +77,11 @@ void test_perf_buffer(void) /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) { + printf("skipping offline CPU #%d\n", i); + continue; + } + CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -81,8 +99,8 @@ void test_perf_buffer(void) if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) goto out_free_pb; - if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", - "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) + if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", + "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; out_free_pb: @@ -91,4 +109,5 @@ out_detach: bpf_link__destroy(link); out_close: bpf_object__close(obj); + free(online); } diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c new file mode 100644 index 000000000000..041952524c55 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <test_progs.h> + +__u32 get_map_id(struct bpf_object *obj, const char *name) +{ + struct bpf_map_info map_info = {}; + __u32 map_info_len, duration = 0; + struct bpf_map *map; + int err; + + map_info_len = sizeof(map_info); + + map = bpf_object__find_map_by_name(obj, name); + if (CHECK(!map, "find map", "NULL map")) + return 0; + + err = bpf_obj_get_info_by_fd(bpf_map__fd(map), + &map_info, &map_info_len); + CHECK(err, "get map info", "err %d errno %d", err, errno); + return map_info.id; +} + +void test_pinning(void) +{ + const char *file_invalid = "./test_pinning_invalid.o"; + const char *custpinpath = "/sys/fs/bpf/custom/pinmap"; + const char *nopinpath = "/sys/fs/bpf/nopinmap"; + const char *nopinpath2 = "/sys/fs/bpf/nopinmap2"; + const char *custpath = "/sys/fs/bpf/custom"; + const char *pinpath = "/sys/fs/bpf/pinmap"; + const char *file = "./test_pinning.o"; + __u32 map_id, map_id2, duration = 0; + struct stat statbuf = {}; + struct bpf_object *obj; + struct bpf_map *map; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .pin_root_path = custpath, + ); + + /* check that opening fails with invalid pinning value in map def */ + obj = bpf_object__open_file(file_invalid, NULL); + err = libbpf_get_error(obj); + if (CHECK(err != -EINVAL, "invalid open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + /* open the valid object file */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "default load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that pinmap was pinned */ + err = stat(pinpath, &statbuf); + if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap was *not* pinned */ + err = stat(nopinpath, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath", + "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap2 was *not* pinned */ + err = stat(nopinpath2, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath2", + "err %d errno %d\n", err, errno)) + goto out; + + map_id = get_map_id(obj, "pinmap"); + if (!map_id) + goto out; + + bpf_object__close(obj); + + obj = bpf_object__open_file(file, NULL); + if (CHECK_FAIL(libbpf_get_error(obj))) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "default load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that same map ID was reused for second load */ + map_id2 = get_map_id(obj, "pinmap"); + if (CHECK(map_id != map_id2, "check reuse", + "err %d errno %d id %d id2 %d\n", err, errno, map_id, map_id2)) + goto out; + + /* should be no-op to re-pin same map */ + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto out; + + err = bpf_map__pin(map, NULL); + if (CHECK(err, "re-pin map", "err %d errno %d\n", err, errno)) + goto out; + + /* but error to pin at different location */ + err = bpf_map__pin(map, "/sys/fs/bpf/other"); + if (CHECK(!err, "pin map different", "err %d errno %d\n", err, errno)) + goto out; + + /* unpin maps with a pin_path set */ + err = bpf_object__unpin_maps(obj, NULL); + if (CHECK(err, "unpin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* and re-pin them... */ + err = bpf_object__pin_maps(obj, NULL); + if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* set pinning path of other map and re-pin all */ + map = bpf_object__find_map_by_name(obj, "nopinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto out; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto out; + + /* should only pin the one unpinned map */ + err = bpf_object__pin_maps(obj, NULL); + if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) + goto out; + + /* check that nopinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + /* remove the custom pin path to re-test it with auto-pinning below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* open the valid object file again */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + /* set pin paths so that nopinmap2 will attempt to reuse the map at + * pinpath (which will fail), but not before pinmap has already been + * reused + */ + bpf_object__for_each_map(map, obj) { + if (!strcmp(bpf_map__name(map), "nopinmap")) + err = bpf_map__set_pin_path(map, nopinpath2); + else if (!strcmp(bpf_map__name(map), "nopinmap2")) + err = bpf_map__set_pin_path(map, pinpath); + else + continue; + + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto out; + } + + /* should fail because of map parameter mismatch */ + err = bpf_object__load(obj); + if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno)) + goto out; + + /* nopinmap2 should have been pinned and cleaned up again */ + err = stat(nopinpath2, &statbuf); + if (CHECK(!err || errno != ENOENT, "stat nopinpath2", + "err %d errno %d\n", err, errno)) + goto out; + + /* pinmap should still be there */ + err = stat(pinpath, &statbuf); + if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test auto-pinning at custom path with open opt */ + obj = bpf_object__open_file(file, &opts); + if (CHECK_FAIL(libbpf_get_error(obj))) { + obj = NULL; + goto out; + } + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto out; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto out; + +out: + unlink(pinpath); + unlink(nopinpath); + unlink(nopinpath2); + unlink(custpinpath); + rmdir(custpath); + if (obj) + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c index 4ecfd721a044..a2537dfa899c 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c @@ -9,10 +9,8 @@ void test_pkt_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c index ac0d43435806..5f7aea605019 100644 --- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c +++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c @@ -9,10 +9,8 @@ void test_pkt_md_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c new file mode 100644 index 000000000000..7aecfd9e87d1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_probe_user(void) +{ + const char *prog_name = "kprobe/__sys_connect"; + const char *obj_file = "./test_probe_user.o"; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); + int err, results_map_fd, sock_fd, duration = 0; + struct sockaddr curr, orig, tmp; + struct sockaddr_in *in = (struct sockaddr_in *)&curr; + struct bpf_link *kprobe_link = NULL; + struct bpf_program *kprobe_prog; + struct bpf_object *obj; + static const int zero = 0; + + obj = bpf_object__open_file(obj_file, &opts); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + return; + + kprobe_prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!kprobe_prog, "find_probe", + "prog '%s' not found\n", prog_name)) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + + results_map_fd = bpf_find_map(__func__, obj, "test_pro.bss"); + if (CHECK(results_map_fd < 0, "find_bss_map", + "err %d\n", results_map_fd)) + goto cleanup; + + kprobe_link = bpf_program__attach(kprobe_prog); + if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", + "err %ld\n", PTR_ERR(kprobe_link))) { + kprobe_link = NULL; + goto cleanup; + } + + memset(&curr, 0, sizeof(curr)); + in->sin_family = AF_INET; + in->sin_port = htons(5555); + in->sin_addr.s_addr = inet_addr("255.255.255.255"); + memcpy(&orig, &curr, sizeof(curr)); + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK(sock_fd < 0, "create_sock_fd", "err %d\n", sock_fd)) + goto cleanup; + + connect(sock_fd, &curr, sizeof(curr)); + close(sock_fd); + + err = bpf_map_lookup_elem(results_map_fd, &zero, &tmp); + if (CHECK(err, "get_kprobe_res", + "failed to get kprobe res: %d\n", err)) + goto cleanup; + + in = (struct sockaddr_in *)&tmp; + if (CHECK(memcmp(&tmp, &orig, sizeof(orig)), "check_kprobe_res", + "wrong kprobe res from probe read: %s:%u\n", + inet_ntoa(in->sin_addr), ntohs(in->sin_port))) + goto cleanup; + + memset(&tmp, 0xab, sizeof(tmp)); + + in = (struct sockaddr_in *)&curr; + if (CHECK(memcmp(&curr, &tmp, sizeof(tmp)), "check_kprobe_res", + "wrong kprobe res from probe write: %s:%u\n", + inet_ntoa(in->sin_addr), ntohs(in->sin_port))) + goto cleanup; +cleanup: + bpf_link__destroy(kprobe_link); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c index e60cd5ff1f55..faccc66f4e39 100644 --- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c +++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c @@ -27,10 +27,8 @@ static void test_queue_stack_map_by_type(int type) return; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_in_fd = bpf_find_map(__func__, obj, "map_in"); if (map_in_fd < 0) @@ -43,10 +41,8 @@ static void test_queue_stack_map_by_type(int type) /* Push 32 elements to the input map */ for (i = 0; i < MAP_SIZE; i++) { err = bpf_map_update_elem(map_in_fd, NULL, &vals[i], 0); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) goto out; - } } /* The eBPF program pushes iph.saddr in the output map, diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c new file mode 100644 index 000000000000..563e12120e77 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +struct bss { + unsigned did_run; + unsigned iters; + unsigned sum; +}; + +struct rdonly_map_subtest { + const char *subtest_name; + const char *prog_name; + unsigned exp_iters; + unsigned exp_sum; +}; + +void test_rdonly_maps(void) +{ + const char *file = "test_rdonly_maps.o"; + struct rdonly_map_subtest subtests[] = { + { "skip loop", "skip_loop", 0, 0 }, + { "part loop", "part_loop", 3, 2 + 3 + 4 }, + { "full loop", "full_loop", 4, 2 + 3 + 4 + 5 }, + }; + int i, err, zero = 0, duration = 0; + struct bpf_link *link = NULL; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct bpf_object *obj; + struct bss bss; + + obj = bpf_object__open_file(file, NULL); + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) + return; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) + goto cleanup; + + bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss"); + if (CHECK(!bss_map, "find_bss_map", "failed\n")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(subtests); i++) { + const struct rdonly_map_subtest *t = &subtests[i]; + + if (!test__start_subtest(t->subtest_name)) + continue; + + prog = bpf_object__find_program_by_name(obj, t->prog_name); + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", + t->prog_name)) + goto cleanup; + + memset(&bss, 0, sizeof(bss)); + err = bpf_map_update_elem(bpf_map__fd(bss_map), &zero, &bss, 0); + if (CHECK(err, "set_bss", "failed to set bss data: %d\n", err)) + goto cleanup; + + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); + if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n", + t->prog_name, PTR_ERR(link))) { + link = NULL; + goto cleanup; + } + + /* trigger probe */ + usleep(1); + + bpf_link__destroy(link); + link = NULL; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, &bss); + if (CHECK(err, "get_bss", "failed to get bss data: %d\n", err)) + goto cleanup; + if (CHECK(bss.did_run == 0, "check_run", + "prog '%s' didn't run?\n", t->prog_name)) + goto cleanup; + if (CHECK(bss.iters != t->exp_iters, "check_iters", + "prog '%s' iters: %d, expected: %d\n", + t->prog_name, bss.iters, t->exp_iters)) + goto cleanup; + if (CHECK(bss.sum != t->exp_sum, "check_sum", + "prog '%s' sum: %d, expected: %d\n", + t->prog_name, bss.sum, t->exp_sum)) + goto cleanup; + } + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 5633be43828f..fc0d7f4f02cf 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -1,28 +1,27 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> -static int libbpf_debug_print(enum libbpf_print_level level, - const char *format, va_list args) -{ - if (level == LIBBPF_DEBUG) - return 0; - - return vfprintf(stderr, format, args); -} - void test_reference_tracking(void) { - const char *file = "./test_sk_lookup_kern.o"; + const char *file = "test_sk_lookup_kern.o"; + const char *obj_name = "ref_track"; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .object_name = obj_name, + .relaxed_maps = true, + ); struct bpf_object *obj; struct bpf_program *prog; __u32 duration = 0; int err = 0; - obj = bpf_object__open(file); - if (IS_ERR(obj)) { - error_cnt++; + obj = bpf_object__open_file(file, &open_opts); + if (CHECK_FAIL(IS_ERR(obj))) return; - } + + if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", + "wrong obj name '%s', expected '%s'\n", + bpf_object__name(obj), obj_name)) + goto cleanup; bpf_object__for_each_program(prog, obj) { const char *title; @@ -32,17 +31,22 @@ void test_reference_tracking(void) if (strstr(title, ".text") != NULL) continue; - bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS); + if (!test__start_subtest(title)) + continue; /* Expect verifier failure if test name has 'fail' */ if (strstr(title, "fail") != NULL) { - libbpf_set_print(NULL); + libbpf_print_fn_t old_print_fn; + + old_print_fn = libbpf_set_print(NULL); err = !bpf_program__load(prog, "GPL", 0); - libbpf_set_print(libbpf_debug_print); + libbpf_set_print(old_print_fn); } else { err = bpf_program__load(prog, "GPL", 0); } CHECK(err, title, "\n"); } + +cleanup: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c index 29833aeaf0de..9d9351dc2ded 100644 --- a/tools/testing/selftests/bpf/test_section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook +#include <test_progs.h> -#include <err.h> -#include <bpf/libbpf.h> - -#include "bpf_util.h" +static int duration = 0; struct sec_name_test { const char sec_name[32]; @@ -20,19 +18,23 @@ struct sec_name_test { }; static struct sec_name_test tests[] = { - {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} }, - {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"InvAliD", {-ESRCH, 0, 0}, {-EINVAL, 0} }, + {"cgroup", {-ESRCH, 0, 0}, {-EINVAL, 0} }, {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} }, {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"uprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"uretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} }, {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} }, {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, + {"tp/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, { "raw_tracepoint/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0}, }, + {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} }, {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, @@ -146,7 +148,7 @@ static struct sec_name_test tests[] = { }, }; -static int test_prog_type_by_name(const struct sec_name_test *test) +static void test_prog_type_by_name(const struct sec_name_test *test) { enum bpf_attach_type expected_attach_type; enum bpf_prog_type prog_type; @@ -155,79 +157,47 @@ static int test_prog_type_by_name(const struct sec_name_test *test) rc = libbpf_prog_type_by_name(test->sec_name, &prog_type, &expected_attach_type); - if (rc != test->expected_load.rc) { - warnx("prog: unexpected rc=%d for %s", rc, test->sec_name); - return -1; - } + CHECK(rc != test->expected_load.rc, "check_code", + "prog: unexpected rc=%d for %s", rc, test->sec_name); if (rc) - return 0; - - if (prog_type != test->expected_load.prog_type) { - warnx("prog: unexpected prog_type=%d for %s", prog_type, - test->sec_name); - return -1; - } + return; - if (expected_attach_type != test->expected_load.expected_attach_type) { - warnx("prog: unexpected expected_attach_type=%d for %s", - expected_attach_type, test->sec_name); - return -1; - } + CHECK(prog_type != test->expected_load.prog_type, "check_prog_type", + "prog: unexpected prog_type=%d for %s", + prog_type, test->sec_name); - return 0; + CHECK(expected_attach_type != test->expected_load.expected_attach_type, + "check_attach_type", "prog: unexpected expected_attach_type=%d for %s", + expected_attach_type, test->sec_name); } -static int test_attach_type_by_name(const struct sec_name_test *test) +static void test_attach_type_by_name(const struct sec_name_test *test) { enum bpf_attach_type attach_type; int rc; rc = libbpf_attach_type_by_name(test->sec_name, &attach_type); - if (rc != test->expected_attach.rc) { - warnx("attach: unexpected rc=%d for %s", rc, test->sec_name); - return -1; - } + CHECK(rc != test->expected_attach.rc, "check_ret", + "attach: unexpected rc=%d for %s", rc, test->sec_name); if (rc) - return 0; - - if (attach_type != test->expected_attach.attach_type) { - warnx("attach: unexpected attach_type=%d for %s", attach_type, - test->sec_name); - return -1; - } + return; - return 0; + CHECK(attach_type != test->expected_attach.attach_type, + "check_attach_type", "attach: unexpected attach_type=%d for %s", + attach_type, test->sec_name); } -static int run_test_case(const struct sec_name_test *test) +void test_section_names(void) { - if (test_prog_type_by_name(test)) - return -1; - if (test_attach_type_by_name(test)) - return -1; - return 0; -} - -static int run_tests(void) -{ - int passes = 0; - int fails = 0; int i; for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(&tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} + struct sec_name_test *test = &tests[i]; -int main(int argc, char **argv) -{ - return run_tests(); + test_prog_type_by_name(test); + test_attach_type_by_name(test); + } } diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 7566c13eb51a..098bcae5f827 100644 --- a/tools/testing/selftests/bpf/test_select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -20,8 +20,11 @@ #include <bpf/libbpf.h> #include "bpf_rlimit.h" #include "bpf_util.h" + +#include "test_progs.h" #include "test_select_reuseport_common.h" +#define MAX_TEST_NAME 80 #define MIN_TCPHDR_LEN 20 #define UDPHDR_LEN 8 @@ -30,13 +33,13 @@ #define REUSEPORT_ARRAY_SIZE 32 static int result_map, tmp_index_ovr_map, linum_map, data_check_map; -static enum result expected_results[NR_RESULTS]; +static __u32 expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; -static int reuseport_array, outer_map; +static int reuseport_array = -1, outer_map = -1; static int select_by_skb_data_prog; -static int saved_tcp_syncookie; +static int saved_tcp_syncookie = -1; static struct bpf_object *obj; -static int saved_tcp_fo; +static int saved_tcp_fo = -1; static __u32 index_zero; static int epfd; @@ -46,16 +49,21 @@ static union sa46 { sa_family_t family; } srv_sa; -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - exit(-1); \ +#define RET_IF(condition, tag, format...) ({ \ + if (CHECK_FAIL(condition)) { \ + printf(tag " " format); \ + return; \ + } \ +}) + +#define RET_ERR(condition, tag, format...) ({ \ + if (CHECK_FAIL(condition)) { \ + printf(tag " " format); \ + return -1; \ } \ }) -static void create_maps(void) +static int create_maps(void) { struct bpf_create_map_attr attr = {}; @@ -67,8 +75,8 @@ static void create_maps(void) attr.max_entries = REUSEPORT_ARRAY_SIZE; reuseport_array = bpf_create_map_xattr(&attr); - CHECK(reuseport_array == -1, "creating reuseport_array", - "reuseport_array:%d errno:%d\n", reuseport_array, errno); + RET_ERR(reuseport_array == -1, "creating reuseport_array", + "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ attr.name = "outer_map"; @@ -78,63 +86,61 @@ static void create_maps(void) attr.max_entries = 1; attr.inner_map_fd = reuseport_array; outer_map = bpf_create_map_xattr(&attr); - CHECK(outer_map == -1, "creating outer_map", - "outer_map:%d errno:%d\n", outer_map, errno); + RET_ERR(outer_map == -1, "creating outer_map", + "outer_map:%d errno:%d\n", outer_map, errno); + + return 0; } -static void prepare_bpf_obj(void) +static int prepare_bpf_obj(void) { struct bpf_program *prog; struct bpf_map *map; int err; - struct bpf_object_open_attr attr = { - .file = "test_select_reuseport_kern.o", - .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, - }; - - obj = bpf_object__open_xattr(&attr); - CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", - "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); - prog = bpf_program__next(NULL, obj); - CHECK(!prog, "get first bpf_program", "!prog\n"); - bpf_program__set_type(prog, attr.prog_type); + obj = bpf_object__open("test_select_reuseport_kern.o"); + RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", + "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); map = bpf_object__find_map_by_name(obj, "outer_map"); - CHECK(!map, "find outer_map", "!map\n"); + RET_ERR(!map, "find outer_map", "!map\n"); err = bpf_map__reuse_fd(map, outer_map); - CHECK(err, "reuse outer_map", "err:%d\n", err); + RET_ERR(err, "reuse outer_map", "err:%d\n", err); err = bpf_object__load(obj); - CHECK(err, "load bpf_object", "err:%d\n", err); + RET_ERR(err, "load bpf_object", "err:%d\n", err); + prog = bpf_program__next(NULL, obj); + RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); - CHECK(select_by_skb_data_prog == -1, "get prog fd", - "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); + RET_ERR(select_by_skb_data_prog == -1, "get prog fd", + "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); map = bpf_object__find_map_by_name(obj, "result_map"); - CHECK(!map, "find result_map", "!map\n"); + RET_ERR(!map, "find result_map", "!map\n"); result_map = bpf_map__fd(map); - CHECK(result_map == -1, "get result_map fd", - "result_map:%d\n", result_map); + RET_ERR(result_map == -1, "get result_map fd", + "result_map:%d\n", result_map); map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map"); - CHECK(!map, "find tmp_index_ovr_map", "!map\n"); + RET_ERR(!map, "find tmp_index_ovr_map\n", "!map"); tmp_index_ovr_map = bpf_map__fd(map); - CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", - "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); + RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", + "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "find linum_map", "!map\n"); + RET_ERR(!map, "find linum_map", "!map\n"); linum_map = bpf_map__fd(map); - CHECK(linum_map == -1, "get linum_map fd", - "linum_map:%d\n", linum_map); + RET_ERR(linum_map == -1, "get linum_map fd", + "linum_map:%d\n", linum_map); map = bpf_object__find_map_by_name(obj, "data_check_map"); - CHECK(!map, "find data_check_map", "!map\n"); + RET_ERR(!map, "find data_check_map", "!map\n"); data_check_map = bpf_map__fd(map); - CHECK(data_check_map == -1, "get data_check_map fd", - "data_check_map:%d\n", data_check_map); + RET_ERR(data_check_map == -1, "get data_check_map fd", + "data_check_map:%d\n", data_check_map); + + return 0; } static void sa46_init_loopback(union sa46 *sa, sa_family_t family) @@ -163,65 +169,73 @@ static int read_int_sysctl(const char *sysctl) int fd, ret; fd = open(sysctl, 0); - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", - sysctl, fd, errno); + RET_ERR(fd == -1, "open(sysctl)", + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); ret = read(fd, buf, sizeof(buf)); - CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n", - sysctl, ret, errno); - close(fd); + RET_ERR(ret <= 0, "read(sysctl)", + "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno); + close(fd); return atoi(buf); } -static void write_int_sysctl(const char *sysctl, int v) +static int write_int_sysctl(const char *sysctl, int v) { int fd, ret, size; char buf[16]; fd = open(sysctl, O_RDWR); - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", - sysctl, fd, errno); + RET_ERR(fd == -1, "open(sysctl)", + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); size = snprintf(buf, sizeof(buf), "%d", v); ret = write(fd, buf, size); - CHECK(ret != size, "write(sysctl)", - "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno); + RET_ERR(ret != size, "write(sysctl)", + "sysctl:%s ret:%d size:%d errno:%d\n", + sysctl, ret, size, errno); + close(fd); + return 0; } static void restore_sysctls(void) { - write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); + if (saved_tcp_fo != -1) + write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); + if (saved_tcp_syncookie != -1) + write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); } -static void enable_fastopen(void) +static int enable_fastopen(void) { int fo; fo = read_int_sysctl(TCP_FO_SYSCTL); - write_int_sysctl(TCP_FO_SYSCTL, fo | 7); + if (fo < 0) + return -1; + + return write_int_sysctl(TCP_FO_SYSCTL, fo | 7); } -static void enable_syncookie(void) +static int enable_syncookie(void) { - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); } -static void disable_syncookie(void) +static int disable_syncookie(void) { - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); } -static __u32 get_linum(void) +static long get_linum(void) { __u32 linum; int err; err = bpf_map_lookup_elem(linum_map, &index_zero, &linum); - CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", - err, errno); + RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", + err, errno); return linum; } @@ -237,12 +251,12 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, addrlen = sizeof(cli_sa); err = getsockname(cli_fd, (struct sockaddr *)&cli_sa, &addrlen); - CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", - err, errno); + RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", + err, errno); err = bpf_map_lookup_elem(data_check_map, &index_zero, &result); - CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", - err, errno); + RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", + err, errno); if (type == SOCK_STREAM) { expected.len = MIN_TCPHDR_LEN; @@ -284,22 +298,42 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, printf("expected: (0x%x, %u, %u)\n", expected.eth_protocol, expected.ip_protocol, expected.bind_inany); - CHECK(1, "data_check result != expected", - "bpf_prog_linum:%u\n", get_linum()); + RET_IF(1, "data_check result != expected", + "bpf_prog_linum:%ld\n", get_linum()); } - CHECK(!result.hash, "data_check result.hash empty", - "result.hash:%u", result.hash); + RET_IF(!result.hash, "data_check result.hash empty", + "result.hash:%u", result.hash); expected.len += cmd ? sizeof(*cmd) : 0; if (type == SOCK_STREAM) - CHECK(expected.len > result.len, "expected.len > result.len", - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", - expected.len, result.len, get_linum()); + RET_IF(expected.len > result.len, "expected.len > result.len", + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", + expected.len, result.len, get_linum()); else - CHECK(expected.len != result.len, "expected.len != result.len", - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", - expected.len, result.len, get_linum()); + RET_IF(expected.len != result.len, "expected.len != result.len", + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", + expected.len, result.len, get_linum()); +} + +static const char *result_to_str(enum result res) +{ + switch (res) { + case DROP_ERR_INNER_MAP: + return "DROP_ERR_INNER_MAP"; + case DROP_ERR_SKB_DATA: + return "DROP_ERR_SKB_DATA"; + case DROP_ERR_SK_SELECT_REUSEPORT: + return "DROP_ERR_SK_SELECT_REUSEPORT"; + case DROP_MISC: + return "DROP_MISC"; + case PASS: + return "PASS"; + case PASS_ERR_SK_SELECT_REUSEPORT: + return "PASS_ERR_SK_SELECT_REUSEPORT"; + default: + return "UNKNOWN"; + } } static void check_results(void) @@ -310,8 +344,8 @@ static void check_results(void) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &results[i]); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); } for (i = 0; i < NR_RESULTS; i++) { @@ -337,10 +371,10 @@ static void check_results(void) printf(", %u", expected_results[i]); printf("]\n"); - CHECK(expected_results[broken] != results[broken], - "unexpected result", - "expected_results[%u] != results[%u] bpf_prog_linum:%u\n", - broken, broken, get_linum()); + printf("mismatch on %s (bpf_prog_linum:%ld)\n", result_to_str(broken), + get_linum()); + + CHECK_FAIL(true); } static int send_data(int type, sa_family_t family, void *data, size_t len, @@ -350,17 +384,17 @@ static int send_data(int type, sa_family_t family, void *data, size_t len, int fd, err; fd = socket(family, type, 0); - CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); + RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); sa46_init_loopback(&cli_sa, family); err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); - CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); + RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, sizeof(srv_sa)); - CHECK(err != len && expected >= PASS, - "sendto()", "family:%u err:%d errno:%d expected:%d\n", - family, err, errno, expected); + RET_ERR(err != len && expected >= PASS, + "sendto()", "family:%u err:%d errno:%d expected:%d\n", + family, err, errno, expected); return fd; } @@ -375,47 +409,49 @@ static void do_test(int type, sa_family_t family, struct cmd *cmd, cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0, expected); + if (cli_fd < 0) + return; nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0); - CHECK((nev <= 0 && expected >= PASS) || - (nev > 0 && expected < PASS), - "nev <> expected", - "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", - nev, expected, type, family, - cmd ? cmd->reuseport_index : -1, - cmd ? cmd->pass_on_failure : -1); + RET_IF((nev <= 0 && expected >= PASS) || + (nev > 0 && expected < PASS), + "nev <> expected", + "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", + nev, expected, type, family, + cmd ? cmd->reuseport_index : -1, + cmd ? cmd->pass_on_failure : -1); check_results(); check_data(type, family, cmd, cli_fd); if (expected < PASS) return; - CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT && - cmd->reuseport_index != ev.data.u32, - "check cmd->reuseport_index", - "cmd:(%u, %u) ev.data.u32:%u\n", - cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); + RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT && + cmd->reuseport_index != ev.data.u32, + "check cmd->reuseport_index", + "cmd:(%u, %u) ev.data.u32:%u\n", + cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); srv_fd = sk_fds[ev.data.u32]; if (type == SOCK_STREAM) { int new_fd = accept(srv_fd, NULL, 0); - CHECK(new_fd == -1, "accept(srv_fd)", - "ev.data.u32:%u new_fd:%d errno:%d\n", - ev.data.u32, new_fd, errno); + RET_IF(new_fd == -1, "accept(srv_fd)", + "ev.data.u32:%u new_fd:%d errno:%d\n", + ev.data.u32, new_fd, errno); nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); - CHECK(nread != sizeof(rcv_cmd), - "recv(new_fd)", - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", - ev.data.u32, nread, sizeof(rcv_cmd), errno); + RET_IF(nread != sizeof(rcv_cmd), + "recv(new_fd)", + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", + ev.data.u32, nread, sizeof(rcv_cmd), errno); close(new_fd); } else { nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); - CHECK(nread != sizeof(rcv_cmd), - "recv(sk_fds)", - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", - ev.data.u32, nread, sizeof(rcv_cmd), errno); + RET_IF(nread != sizeof(rcv_cmd), + "recv(sk_fds)", + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", + ev.data.u32, nread, sizeof(rcv_cmd), errno); } close(cli_fd); @@ -428,18 +464,14 @@ static void test_err_inner_map(int type, sa_family_t family) .pass_on_failure = 0, }; - printf("%s: ", __func__); expected_results[DROP_ERR_INNER_MAP]++; do_test(type, family, &cmd, DROP_ERR_INNER_MAP); - printf("OK\n"); } static void test_err_skb_data(int type, sa_family_t family) { - printf("%s: ", __func__); expected_results[DROP_ERR_SKB_DATA]++; do_test(type, family, NULL, DROP_ERR_SKB_DATA); - printf("OK\n"); } static void test_err_sk_select_port(int type, sa_family_t family) @@ -449,10 +481,8 @@ static void test_err_sk_select_port(int type, sa_family_t family) .pass_on_failure = 0, }; - printf("%s: ", __func__); expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++; do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT); - printf("OK\n"); } static void test_pass(int type, sa_family_t family) @@ -460,14 +490,12 @@ static void test_pass(int type, sa_family_t family) struct cmd cmd; int i; - printf("%s: ", __func__); cmd.pass_on_failure = 0; for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { expected_results[PASS]++; cmd.reuseport_index = i; do_test(type, family, &cmd, PASS); } - printf("OK\n"); } static void test_syncookie(int type, sa_family_t family) @@ -481,7 +509,6 @@ static void test_syncookie(int type, sa_family_t family) if (type != SOCK_STREAM) return; - printf("%s: ", __func__); /* * +1 for TCP-SYN and * +1 for the TCP-ACK (ack the syncookie) @@ -497,17 +524,16 @@ static void test_syncookie(int type, sa_family_t family) */ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &tmp_index, BPF_ANY); - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", + "err:%d errno:%d\n", err, errno); do_test(type, family, &cmd, PASS); err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero, &tmp_index); - CHECK(err == -1 || tmp_index != -1, - "lookup_elem(tmp_index_ovr_map)", - "err:%d errno:%d tmp_index:%d\n", - err, errno, tmp_index); + RET_IF(err == -1 || tmp_index != -1, + "lookup_elem(tmp_index_ovr_map)", + "err:%d errno:%d tmp_index:%d\n", + err, errno, tmp_index); disable_syncookie(); - printf("OK\n"); } static void test_pass_on_err(int type, sa_family_t family) @@ -517,10 +543,8 @@ static void test_pass_on_err(int type, sa_family_t family) .pass_on_failure = 1, }; - printf("%s: ", __func__); expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1; do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT); - printf("OK\n"); } static void test_detach_bpf(int type, sa_family_t family) @@ -532,46 +556,47 @@ static void test_detach_bpf(int type, sa_family_t family) struct cmd cmd = {}; int optvalue = 0; - printf("%s: ", __func__); err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, &optvalue, sizeof(optvalue)); - CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", + "err:%d errno:%d\n", err, errno); err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, &optvalue, sizeof(optvalue)); - CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == 0 || errno != ENOENT, + "setsockopt(SO_DETACH_REUSEPORT_BPF)", + "err:%d errno:%d\n", err, errno); for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); nr_run_before += tmp; } cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS); + if (cli_fd < 0) + return; nev = epoll_wait(epfd, &ev, 1, 5); - CHECK(nev <= 0, "nev <= 0", - "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", - nev, type, family); + RET_IF(nev <= 0, "nev <= 0", + "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", + nev, type, family); for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); nr_run_after += tmp; } - CHECK(nr_run_before != nr_run_after, - "nr_run_before != nr_run_after", - "nr_run_before:%u nr_run_after:%u\n", - nr_run_before, nr_run_after); + RET_IF(nr_run_before != nr_run_after, + "nr_run_before != nr_run_after", + "nr_run_before:%u nr_run_after:%u\n", + nr_run_before, nr_run_after); - printf("OK\n"); close(cli_fd); #else - printf("%s: SKIP\n", __func__); + test__skip(); #endif } @@ -594,149 +619,220 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) */ for (i = first; i >= 0; i--) { sk_fds[i] = socket(family, type, 0); - CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", - i, sk_fds[i], errno); + RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", + i, sk_fds[i], errno); err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); - CHECK(err == -1, "setsockopt(SO_REUSEPORT)", - "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "setsockopt(SO_REUSEPORT)", + "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); if (i == first) { err = setsockopt(sk_fds[i], SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &select_by_skb_data_prog, sizeof(select_by_skb_data_prog)); - CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", + "err:%d errno:%d\n", err, errno); } err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen); - CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); if (type == SOCK_STREAM) { err = listen(sk_fds[i], 10); - CHECK(err == -1, "listen()", - "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "listen()", + "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); } err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i], BPF_NOEXIST); - CHECK(err == -1, "update_elem(reuseport_array)", - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "update_elem(reuseport_array)", + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (i == first) { socklen_t addrlen = sizeof(srv_sa); err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa, &addrlen); - CHECK(err == -1, "getsockname()", - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "getsockname()", + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); } } epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create(1)", - "epfd:%d errno:%d\n", epfd, errno); + RET_IF(epfd == -1, "epoll_create(1)", + "epfd:%d errno:%d\n", epfd, errno); ev.events = EPOLLIN; for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { ev.data.u32 = i; err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); + RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); } } -static void setup_per_test(int type, unsigned short family, bool inany) +static void setup_per_test(int type, sa_family_t family, bool inany, + bool no_inner_map) { int ovr = -1, err; prepare_sk_fds(type, family, inany); err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr, BPF_ANY); - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", + "err:%d errno:%d\n", err, errno); + + /* Install reuseport_array to outer_map? */ + if (no_inner_map) + return; + + err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array, + BPF_ANY); + RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)", + "err:%d errno:%d\n", err, errno); } -static void cleanup_per_test(void) +static void cleanup_per_test(bool no_inner_map) { - int i, err; + int i, err, zero = 0; + + memset(expected_results, 0, sizeof(expected_results)); + + for (i = 0; i < NR_RESULTS; i++) { + err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY); + RET_IF(err, "reset elem in result_map", + "i:%u err:%d errno:%d\n", i, err, errno); + } + + err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY); + RET_IF(err, "reset line number in linum_map", "err:%d errno:%d\n", + err, errno); for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) close(sk_fds[i]); close(epfd); + /* Delete reuseport_array from outer_map? */ + if (no_inner_map) + return; + err = bpf_map_delete_elem(outer_map, &index_zero); - CHECK(err == -1, "delete_elem(outer_map)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "delete_elem(outer_map)", + "err:%d errno:%d\n", err, errno); } static void cleanup(void) { - close(outer_map); - close(reuseport_array); - bpf_object__close(obj); + if (outer_map != -1) + close(outer_map); + if (reuseport_array != -1) + close(reuseport_array); + if (obj) + bpf_object__close(obj); } -static void test_all(void) +static const char *family_str(sa_family_t family) { - /* Extra SOCK_STREAM to test bind_inany==true */ - const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM }; - const char * const type_strings[] = { "TCP", "UDP", "TCP" }; - const char * const family_strings[] = { "IPv6", "IPv4" }; - const unsigned short families[] = { AF_INET6, AF_INET }; - const bool bind_inany[] = { false, false, true }; - int t, f, err; - - for (f = 0; f < ARRAY_SIZE(families); f++) { - unsigned short family = families[f]; - - for (t = 0; t < ARRAY_SIZE(types); t++) { - bool inany = bind_inany[t]; - int type = types[t]; - - printf("######## %s/%s %s ########\n", - family_strings[f], type_strings[t], - inany ? " INANY " : "LOOPBACK"); - - setup_per_test(type, family, inany); - - test_err_inner_map(type, family); - - /* Install reuseport_array to the outer_map */ - err = bpf_map_update_elem(outer_map, &index_zero, - &reuseport_array, BPF_ANY); - CHECK(err == -1, "update_elem(outer_map)", - "err:%d errno:%d\n", err, errno); - - test_err_skb_data(type, family); - test_err_sk_select_port(type, family); - test_pass(type, family); - test_syncookie(type, family); - test_pass_on_err(type, family); - /* Must be the last test */ - test_detach_bpf(type, family); - - cleanup_per_test(); - printf("\n"); - } + switch (family) { + case AF_INET: + return "IPv4"; + case AF_INET6: + return "IPv6"; + default: + return "unknown"; + } +} + +static const char *sotype_str(int sotype) +{ + switch (sotype) { + case SOCK_STREAM: + return "TCP"; + case SOCK_DGRAM: + return "UDP"; + default: + return "unknown"; } } -int main(int argc, const char **argv) +#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ } + +static void test_config(int sotype, sa_family_t family, bool inany) { - create_maps(); - prepare_bpf_obj(); + const struct test { + void (*fn)(int sotype, sa_family_t family); + const char *name; + bool no_inner_map; + } tests[] = { + TEST_INIT(test_err_inner_map, true /* no_inner_map */), + TEST_INIT(test_err_skb_data), + TEST_INIT(test_err_sk_select_port), + TEST_INIT(test_pass), + TEST_INIT(test_syncookie), + TEST_INIT(test_pass_on_err), + TEST_INIT(test_detach_bpf), + }; + char s[MAX_TEST_NAME]; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s/%s %s %s", + family_str(family), sotype_str(sotype), + inany ? "INANY" : "LOOPBACK", t->name); + + if (!test__start_subtest(s)) + continue; + + setup_per_test(sotype, family, inany, t->no_inner_map); + t->fn(sotype, family); + cleanup_per_test(t->no_inner_map); + } +} + +#define BIND_INANY true + +static void test_all(void) +{ + const struct config { + int sotype; + sa_family_t family; + bool inany; + } configs[] = { + { SOCK_STREAM, AF_INET }, + { SOCK_STREAM, AF_INET, BIND_INANY }, + { SOCK_STREAM, AF_INET6 }, + { SOCK_STREAM, AF_INET6, BIND_INANY }, + { SOCK_DGRAM, AF_INET }, + { SOCK_DGRAM, AF_INET6 }, + }; + const struct config *c; + + for (c = configs; c < configs + ARRAY_SIZE(configs); c++) + test_config(c->sotype, c->family, c->inany); +} + +void test_select_reuseport(void) +{ + if (create_maps()) + goto out; + if (prepare_bpf_obj()) + goto out; + saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); - enable_fastopen(); - disable_syncookie(); - atexit(restore_sysctls); + if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) + goto out; - test_all(); + if (enable_fastopen()) + goto out; + if (disable_syncookie()) + goto out; + test_all(); +out: cleanup(); - return 0; + restore_sysctls(); } diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 54218ee3c004..504abb7bfb95 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_send_signal_kern.skel.h" static volatile int sigusr1_received = 0; @@ -8,28 +9,26 @@ static void sigusr1_handler(int signum) sigusr1_received++; } -static int test_send_signal_common(struct perf_event_attr *attr, - int prog_type, +static void test_send_signal_common(struct perf_event_attr *attr, + bool signal_thread, const char *test_name) { - int err = -1, pmu_fd, prog_fd, info_map_fd, status_map_fd; - const char *file = "./test_send_signal_kern.o"; - struct bpf_object *obj = NULL; + struct test_send_signal_kern *skel; int pipe_c2p[2], pipe_p2c[2]; - __u32 key = 0, duration = 0; + int err = -1, pmu_fd = -1; + __u32 duration = 0; char buf[256]; pid_t pid; - __u64 val; if (CHECK(pipe(pipe_c2p), test_name, "pipe pipe_c2p error: %s\n", strerror(errno))) - goto no_fork_done; + return; if (CHECK(pipe(pipe_p2c), test_name, "pipe pipe_p2c error: %s\n", strerror(errno))) { close(pipe_c2p[0]); close(pipe_c2p[1]); - goto no_fork_done; + return; } pid = fork(); @@ -38,7 +37,7 @@ static int test_send_signal_common(struct perf_event_attr *attr, close(pipe_c2p[1]); close(pipe_p2c[0]); close(pipe_p2c[1]); - goto no_fork_done; + return; } if (pid == 0) { @@ -73,45 +72,39 @@ static int test_send_signal_common(struct perf_event_attr *attr, close(pipe_c2p[1]); /* close write */ close(pipe_p2c[0]); /* close read */ - err = bpf_prog_load(file, prog_type, &obj, &prog_fd); - if (CHECK(err < 0, test_name, "bpf_prog_load error: %s\n", - strerror(errno))) - goto prog_load_failure; - - pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, - -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", - strerror(errno))) { - err = -1; - goto close_prog; - } - - err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_enable error: %s\n", - strerror(errno))) - goto disable_pmu; - - err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd); - if (CHECK(err < 0, test_name, "ioctl perf_event_ioc_set_bpf error: %s\n", - strerror(errno))) - goto disable_pmu; + skel = test_send_signal_kern__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n")) + goto skel_open_load_failure; - err = -1; - info_map_fd = bpf_object__find_map_fd_by_name(obj, "info_map"); - if (CHECK(info_map_fd < 0, test_name, "find map %s error\n", "info_map")) - goto disable_pmu; + if (!attr) { + err = test_send_signal_kern__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) { + err = -1; + goto destroy_skel; + } + } else { + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1, + -1 /* group id */, 0 /* flags */); + if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n", + strerror(errno))) { + err = -1; + goto destroy_skel; + } - status_map_fd = bpf_object__find_map_fd_by_name(obj, "status_map"); - if (CHECK(status_map_fd < 0, test_name, "find map %s error\n", "status_map")) - goto disable_pmu; + skel->links.send_signal_perf = + bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd); + if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.send_signal_perf))) + goto disable_pmu; + } /* wait until child signal handler installed */ read(pipe_c2p[0], buf, 1); /* trigger the bpf send_signal */ - key = 0; - val = (((__u64)(SIGUSR1)) << 32) | pid; - bpf_map_update_elem(info_map_fd, &key, &val, 0); + skel->bss->pid = pid; + skel->bss->sig = SIGUSR1; + skel->bss->signal_thread = signal_thread; /* notify child that bpf program can send_signal now */ write(pipe_p2c[1], buf, 1); @@ -125,55 +118,27 @@ static int test_send_signal_common(struct perf_event_attr *attr, goto disable_pmu; } - err = CHECK(buf[0] != '2', test_name, "incorrect result\n"); + CHECK(buf[0] != '2', test_name, "incorrect result\n"); /* notify child safe to exit */ write(pipe_p2c[1], buf, 1); disable_pmu: close(pmu_fd); -close_prog: - bpf_object__close(obj); -prog_load_failure: +destroy_skel: + test_send_signal_kern__destroy(skel); +skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); wait(NULL); -no_fork_done: - return err; } -static int test_send_signal_tracepoint(void) +static void test_send_signal_tracepoint(bool signal_thread) { - const char *id_path = "/sys/kernel/debug/tracing/events/syscalls/sys_enter_nanosleep/id"; - struct perf_event_attr attr = { - .type = PERF_TYPE_TRACEPOINT, - .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN, - .sample_period = 1, - .wakeup_events = 1, - }; - __u32 duration = 0; - int bytes, efd; - char buf[256]; - - efd = open(id_path, O_RDONLY, 0); - if (CHECK(efd < 0, "tracepoint", - "open syscalls/sys_enter_nanosleep/id failure: %s\n", - strerror(errno))) - return -1; - - bytes = read(efd, buf, sizeof(buf)); - close(efd); - if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "tracepoint", - "read syscalls/sys_enter_nanosleep/id failure: %s\n", - strerror(errno))) - return -1; - - attr.config = strtol(buf, NULL, 0); - - return test_send_signal_common(&attr, BPF_PROG_TYPE_TRACEPOINT, "tracepoint"); + test_send_signal_common(NULL, signal_thread, "tracepoint"); } -static int test_send_signal_perf(void) +static void test_send_signal_perf(bool signal_thread) { struct perf_event_attr attr = { .sample_period = 1, @@ -181,15 +146,13 @@ static int test_send_signal_perf(void) .config = PERF_COUNT_SW_CPU_CLOCK, }; - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_sw_event"); + test_send_signal_common(&attr, signal_thread, "perf_sw_event"); } -static int test_send_signal_nmi(void) +static void test_send_signal_nmi(bool signal_thread) { struct perf_event_attr attr = { - .sample_freq = 50, - .freq = 1, + .sample_period = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; @@ -203,27 +166,30 @@ static int test_send_signal_nmi(void) if (pmu_fd == -1) { if (errno == ENOENT) { printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", - __func__); - return 0; + __func__); + test__skip(); + return; } /* Let the test fail with a more informative message */ } else { close(pmu_fd); } - return test_send_signal_common(&attr, BPF_PROG_TYPE_PERF_EVENT, - "perf_hw_event"); + test_send_signal_common(&attr, signal_thread, "perf_hw_event"); } void test_send_signal(void) { - int ret = 0; - - ret |= test_send_signal_tracepoint(); - ret |= test_send_signal_perf(); - ret |= test_send_signal_nmi(); - if (!ret) - printf("test_send_signal:OK\n"); - else - printf("test_send_signal:FAIL\n"); + if (test__start_subtest("send_signal_tracepoint")) + test_send_signal_tracepoint(false); + if (test__start_subtest("send_signal_perf")) + test_send_signal_perf(false); + if (test__start_subtest("send_signal_nmi")) + test_send_signal_nmi(false); + if (test__start_subtest("send_signal_tracepoint_thread")) + test_send_signal_tracepoint(true); + if (test__start_subtest("send_signal_perf_thread")) + test_send_signal_perf(true); + if (test__start_subtest("send_signal_nmi_thread")) + test_send_signal_nmi(true); } diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index e95baa32e277..c6d6b685a946 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -10,6 +10,10 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, + .tstamp = 7, + .wire_len = 100, + .gso_segs = 8, + .mark = 9, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, @@ -86,4 +90,12 @@ void test_skb_ctx(void) "ctx_out_priority", "skb->priority == %d, expected %d\n", skb.priority, 7); + CHECK_ATTR(skb.tstamp != 8, + "ctx_out_tstamp", + "skb->tstamp == %lld, expected %d\n", + skb.tstamp, 8); + CHECK_ATTR(skb.mark != 10, + "ctx_out_mark", + "skb->mark == %u, expected %d\n", + skb.mark, 10); } diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c new file mode 100644 index 000000000000..9264a2736018 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> + +struct s { + int a; + long long b; +} __attribute__((packed)); + +#include "test_skeleton.skel.h" + +void test_skeleton(void) +{ + int duration = 0, err; + struct test_skeleton* skel; + struct test_skeleton__bss *bss; + struct test_skeleton__kconfig *kcfg; + + skel = test_skeleton__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n")) + goto cleanup; + + err = test_skeleton__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + bss->in1 = 1; + bss->in2 = 2; + bss->in3 = 3; + bss->in4 = 4; + bss->in5.a = 5; + bss->in5.b = 6; + kcfg = skel->kconfig; + + err = test_skeleton__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1); + CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2); + CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); + CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); + CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", + bss->handler_out5.a, 5); + CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", + bss->handler_out5.b, 6); + + CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", + "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL); + CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", + "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); + +cleanup: + test_skeleton__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c new file mode 100644 index 000000000000..07f5b462c2ef --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare + +#include "test_progs.h" + +static int connected_socket_v4(void) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(80), + .sin_addr = { inet_addr("127.0.0.1") }, + }; + socklen_t len = sizeof(addr); + int s, repair, err; + + s = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK_FAIL(s == -1)) + goto error; + + repair = TCP_REPAIR_ON; + err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); + if (CHECK_FAIL(err)) + goto error; + + err = connect(s, (struct sockaddr *)&addr, len); + if (CHECK_FAIL(err)) + goto error; + + repair = TCP_REPAIR_OFF_NO_WP; + err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair)); + if (CHECK_FAIL(err)) + goto error; + + return s; +error: + perror(__func__); + close(s); + return -1; +} + +/* Create a map, populate it with one socket, and free the map. */ +static void test_sockmap_create_update_free(enum bpf_map_type map_type) +{ + const int zero = 0; + int s, map, err; + + s = connected_socket_v4(); + if (CHECK_FAIL(s == -1)) + return; + + map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0); + if (CHECK_FAIL(map == -1)) { + perror("bpf_create_map"); + goto out; + } + + err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST); + if (CHECK_FAIL(err)) { + perror("bpf_map_update"); + goto out; + } + +out: + close(map); + close(s); +} + +void test_sockmap_basic(void) +{ + if (test__start_subtest("sockmap create_update_free")) + test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash create_update_free")) + test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH); +} diff --git a/tools/testing/selftests/bpf/test_sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index 23bd0819382d..3e8517a8395a 100644 --- a/tools/testing/selftests/bpf/test_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -1,22 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <errno.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> - -#include <linux/filter.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "bpf_util.h" +#include <test_progs.h> #include "cgroup_helpers.h" -#define CG_PATH "/sockopt" - static char bpf_log_buf[4096]; static bool verbose; @@ -983,39 +968,18 @@ close_prog_fd: return ret; } -int main(int args, char **argv) +void test_sockopt(void) { - int err = EXIT_FAILURE, error_cnt = 0; int cgroup_fd, i; - if (setup_cgroup_environment()) - goto cleanup_obj; - - cgroup_fd = create_and_get_cgroup(CG_PATH); - if (cgroup_fd < 0) - goto cleanup_cgroup_env; - - if (join_cgroup(CG_PATH)) - goto cleanup_cgroup; + cgroup_fd = test__join_cgroup("/sockopt"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; for (i = 0; i < ARRAY_SIZE(tests); i++) { - int err = run_test(cgroup_fd, &tests[i]); - - if (err) - error_cnt++; - - printf("#%d %s: %s\n", i, err ? "FAIL" : "PASS", - tests[i].descr); + test__start_subtest(tests[i].descr); + CHECK_FAIL(run_test(cgroup_fd, &tests[i])); } - printf("Summary: %ld PASSED, %d FAILED\n", - ARRAY_SIZE(tests) - error_cnt, error_cnt); - err = error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; - -cleanup_cgroup: close(cgroup_fd); -cleanup_cgroup_env: - cleanup_cgroup_environment(); -cleanup_obj: - return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c new file mode 100644 index 000000000000..8547ecbdc61f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include "cgroup_helpers.h" + +#define SOL_CUSTOM 0xdeadbeef +#define CUSTOM_INHERIT1 0 +#define CUSTOM_INHERIT2 1 +#define CUSTOM_LISTENER 2 + +static int connect_to_server(int server_fd) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); + goto out; + } + + if (connect(fd, (const struct sockaddr *)&addr, len) < 0) { + log_err("Fail to connect to server"); + goto out; + } + + return fd; + +out: + close(fd); + return -1; +} + +static int verify_sockopt(int fd, int optname, const char *msg, char expected) +{ + socklen_t optlen = 1; + char buf = 0; + int err; + + err = getsockopt(fd, SOL_CUSTOM, optname, &buf, &optlen); + if (err) { + log_err("%s: failed to call getsockopt", msg); + return 1; + } + + printf("%s %d: got=0x%x ? expected=0x%x\n", msg, optname, buf, expected); + + if (buf != expected) { + log_err("%s: unexpected getsockopt value %d != %d", msg, + buf, expected); + return 1; + } + + return 0; +} + +static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; + +static void *server_thread(void *arg) +{ + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int fd = *(int *)arg; + int client_fd; + int err = 0; + + err = listen(fd, 1); + + pthread_mutex_lock(&server_started_mtx); + pthread_cond_signal(&server_started); + pthread_mutex_unlock(&server_started_mtx); + + if (CHECK_FAIL(err < 0)) { + perror("Failed to listed on socket"); + return NULL; + } + + err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1); + err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1); + err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1); + + client_fd = accept(fd, (struct sockaddr *)&addr, &len); + if (CHECK_FAIL(client_fd < 0)) { + perror("Failed to accept client"); + return NULL; + } + + err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1); + err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1); + err += verify_sockopt(client_fd, CUSTOM_LISTENER, "accept", 0); + + close(client_fd); + + return (void *)(long)err; +} + +static int start_server(void) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + }; + char buf; + int err; + int fd; + int i; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create server socket"); + return -1; + } + + for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) { + buf = 0x01; + err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1); + if (err) { + log_err("Failed to call setsockopt(%d)", i); + close(fd); + return -1; + } + } + + if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) { + log_err("Failed to bind socket"); + close(fd); + return -1; + } + + return fd; +} + +static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + int err; + + err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); + if (err) { + log_err("Failed to deduct types for %s BPF program", title); + return -1; + } + + prog = bpf_object__find_program_by_title(obj, title); + if (!prog) { + log_err("Failed to find %s BPF program", title); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, + attach_type, 0); + if (err) { + log_err("Failed to attach %s BPF program", title); + return -1; + } + + return 0; +} + +static void run_test(int cgroup_fd) +{ + struct bpf_prog_load_attr attr = { + .file = "./sockopt_inherit.o", + }; + int server_fd = -1, client_fd; + struct bpf_object *obj; + void *server_err; + pthread_t tid; + int ignored; + int err; + + err = bpf_prog_load_xattr(&attr, &obj, &ignored); + if (CHECK_FAIL(err)) + return; + + err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); + if (CHECK_FAIL(err)) + goto close_bpf_object; + + err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); + if (CHECK_FAIL(err)) + goto close_bpf_object; + + server_fd = start_server(); + if (CHECK_FAIL(server_fd < 0)) + goto close_bpf_object; + + if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, + (void *)&server_fd))) + goto close_server_fd; + + pthread_mutex_lock(&server_started_mtx); + pthread_cond_wait(&server_started, &server_started_mtx); + pthread_mutex_unlock(&server_started_mtx); + + client_fd = connect_to_server(server_fd); + if (CHECK_FAIL(client_fd < 0)) + goto close_server_fd; + + CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0)); + CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0)); + CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0)); + + pthread_join(tid, &server_err); + + err = (int)(long)server_err; + CHECK_FAIL(err); + + close(client_fd); + +close_server_fd: + close(server_fd); +close_bpf_object: + bpf_object__close(obj); +} + +void test_sockopt_inherit(void) +{ + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/sockopt_inherit"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; + + run_test(cgroup_fd); + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/test_sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 4be3441db867..29188d6f5c8d 100644 --- a/tools/testing/selftests/bpf/test_sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -1,19 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <error.h> -#include <errno.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> - -#include <linux/filter.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "bpf_util.h" +#include <test_progs.h> #include "cgroup_helpers.h" static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) @@ -308,7 +294,7 @@ detach: return err; } -int main(int argc, char **argv) +void test_sockopt_multi(void) { struct bpf_prog_load_attr attr = { .file = "./sockopt_multi.o", @@ -319,56 +305,28 @@ int main(int argc, char **argv) int err = -1; int ignored; - if (setup_cgroup_environment()) { - log_err("Failed to setup cgroup environment\n"); - goto out; - } - - cg_parent = create_and_get_cgroup("/parent"); - if (cg_parent < 0) { - log_err("Failed to create cgroup /parent\n"); - goto out; - } - - cg_child = create_and_get_cgroup("/parent/child"); - if (cg_child < 0) { - log_err("Failed to create cgroup /parent/child\n"); + cg_parent = test__join_cgroup("/parent"); + if (CHECK_FAIL(cg_parent < 0)) goto out; - } - if (join_cgroup("/parent/child")) { - log_err("Failed to join cgroup /parent/child\n"); + cg_child = test__join_cgroup("/parent/child"); + if (CHECK_FAIL(cg_child < 0)) goto out; - } err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (err) { - log_err("Failed to load BPF object"); + if (CHECK_FAIL(err)) goto out; - } sock_fd = socket(AF_INET, SOCK_STREAM, 0); - if (sock_fd < 0) { - log_err("Failed to create socket"); + if (CHECK_FAIL(sock_fd < 0)) goto out; - } - if (run_getsockopt_test(obj, cg_parent, cg_child, sock_fd)) - err = -1; - printf("test_sockopt_multi: getsockopt %s\n", - err ? "FAILED" : "PASSED"); - - if (run_setsockopt_test(obj, cg_parent, cg_child, sock_fd)) - err = -1; - printf("test_sockopt_multi: setsockopt %s\n", - err ? "FAILED" : "PASSED"); + CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd)); + CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd)); out: close(sock_fd); bpf_object__close(obj); close(cg_child); close(cg_parent); - - printf("test_sockopt_multi: %s\n", err ? "FAILED" : "PASSED"); - return err ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 036b652e5ca9..2061a6beac0f 100644 --- a/tools/testing/selftests/bpf/test_sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -1,22 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <errno.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> - -#include <linux/filter.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "bpf_util.h" +#include <test_progs.h> #include "cgroup_helpers.h" -#define CG_PATH "/sockopt" - #define SOL_CUSTOM 0xdeadbeef static int getsetsockopt(void) @@ -25,6 +10,7 @@ static int getsetsockopt(void) union { char u8[4]; __u32 u32; + char cc[16]; /* TCP_CA_NAME_MAX */ } buf = {}; socklen_t optlen; @@ -115,6 +101,29 @@ static int getsetsockopt(void) goto err; } + /* TCP_CONGESTION can extend the string */ + + strcpy(buf.cc, "nv"); + err = setsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, strlen("nv")); + if (err) { + log_err("Failed to call setsockopt(TCP_CONGESTION)"); + goto err; + } + + + optlen = sizeof(buf.cc); + err = getsockopt(fd, SOL_TCP, TCP_CONGESTION, &buf, &optlen); + if (err) { + log_err("Failed to call getsockopt(TCP_CONGESTION)"); + goto err; + } + + if (strcmp(buf.cc, "cubic") != 0) { + log_err("Unexpected getsockopt(TCP_CONGESTION) %s != %s", + buf.cc, "cubic"); + goto err; + } + close(fd); return 0; err: @@ -151,7 +160,7 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title) return 0; } -static int run_test(int cgroup_fd) +static void run_test(int cgroup_fd) { struct bpf_prog_load_attr attr = { .file = "./sockopt_sk.o", @@ -161,51 +170,31 @@ static int run_test(int cgroup_fd) int err; err = bpf_prog_load_xattr(&attr, &obj, &ignored); - if (err) { - log_err("Failed to load BPF object"); - return -1; - } + if (CHECK_FAIL(err)) + return; err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt"); - if (err) + if (CHECK_FAIL(err)) goto close_bpf_object; err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt"); - if (err) + if (CHECK_FAIL(err)) goto close_bpf_object; - err = getsetsockopt(); + CHECK_FAIL(getsetsockopt()); close_bpf_object: bpf_object__close(obj); - return err; } -int main(int args, char **argv) +void test_sockopt_sk(void) { int cgroup_fd; - int err = EXIT_SUCCESS; - - if (setup_cgroup_environment()) - goto cleanup_obj; - - cgroup_fd = create_and_get_cgroup(CG_PATH); - if (cgroup_fd < 0) - goto cleanup_cgroup_env; - - if (join_cgroup(CG_PATH)) - goto cleanup_cgroup; - - if (run_test(cgroup_fd)) - err = EXIT_FAILURE; - printf("test_sockopt_sk: %s\n", - err == EXIT_SUCCESS ? "PASSED" : "FAILED"); + cgroup_fd = test__join_cgroup("/sockopt_sk"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; -cleanup_cgroup: + run_test(cgroup_fd); close(cgroup_fd); -cleanup_cgroup_env: - cleanup_cgroup_environment(); -cleanup_obj: - return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 114ebe6a438e..1ae00cd3174e 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -11,19 +11,19 @@ void test_spinlock(void) void *ret; err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd); - if (err) { + if (CHECK_FAIL(err)) { printf("test_spin_lock:bpf_prog_load errno %d\n", errno); goto close_prog; } for (i = 0; i < 4; i++) - assert(pthread_create(&thread_id[i], NULL, - &spin_lock_thread, &prog_fd) == 0); + if (CHECK_FAIL(pthread_create(&thread_id[i], NULL, + &spin_lock_thread, &prog_fd))) + goto close_prog; + for (i = 0; i < 4; i++) - assert(pthread_join(thread_id[i], &ret) == 0 && - ret == (void *)&prog_fd); - goto close_prog_noerr; + if (CHECK_FAIL(pthread_join(thread_id[i], &ret) || + ret != (void *)&prog_fd)) + goto close_prog; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index ac44fda84833..e8399ae50e77 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -1,16 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" void test_stacktrace_build_id(void) { + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "tracepoint/random/urandom_read"; - const char *file = "./test_stacktrace_build_id.o"; - int err, prog_fd, stack_trace_len; + struct test_stacktrace_build_id *skel; + int err, stack_trace_len; __u32 key, previous_key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link = NULL; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -18,42 +16,24 @@ void test_stacktrace_build_id(void) int retry = 1; retry: - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = test_stacktrace_build_id__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) return; - prog = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "random", "urandom_read"); - if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link))) - goto close_prog; + err = test_stacktrace_build_id__attach(skel); + if (CHECK(err, "attach_tp", "err %d\n", err)) + goto cleanup; /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") - == 0); - assert(system("./urandom_read") == 0); + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); + + if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) + goto cleanup; + if (CHECK_FAIL(system("./urandom_read"))) + goto cleanup; /* disable stack trace collection */ key = 0; val = 1; @@ -65,23 +45,23 @@ retry: err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = extract_build_id(buf, 256); if (CHECK(err, "get build_id with readelf", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = bpf_map_get_next_key(stackmap_fd, NULL, &key); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; do { char build_id[64]; @@ -89,7 +69,7 @@ retry: err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { @@ -107,8 +87,7 @@ retry: * try it one more time. */ if (build_id_matches < 1 && retry--) { - bpf_link__destroy(link); - bpf_object__close(obj); + test_stacktrace_build_id__destroy(skel); printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", __func__); goto retry; @@ -116,17 +95,14 @@ retry: if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) - goto disable_pmu; + goto cleanup; - stack_trace_len = PERF_MAX_STACK_DEPTH - * sizeof(struct bpf_stack_build_id); + stack_trace_len = PERF_MAX_STACK_DEPTH * + sizeof(struct bpf_stack_build_id); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno); -disable_pmu: - bpf_link__destroy(link); - -close_prog: - bpf_object__close(obj); +cleanup: + test_stacktrace_build_id__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index 9557b7dfb782..f002e3090d92 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" static __u64 read_perf_max_sample_freq(void) { @@ -16,19 +17,15 @@ static __u64 read_perf_max_sample_freq(void) void test_stacktrace_build_id_nmi(void) { - int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "tracepoint/random/urandom_read"; - const char *file = "./test_stacktrace_build_id.o"; - int err, pmu_fd, prog_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd; + struct test_stacktrace_build_id *skel; + int err, pmu_fd; struct perf_event_attr attr = { .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; __u32 key, previous_key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -38,53 +35,46 @@ void test_stacktrace_build_id_nmi(void) attr.sample_freq = read_perf_max_sample_freq(); retry: - err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = test_stacktrace_build_id__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - prog = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; + /* override program type */ + bpf_program__set_perf_event(skel->progs.oncpu); + + err = test_stacktrace_build_id__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, 0 /* flags */); - if (CHECK(pmu_fd < 0, "perf_event_open", - "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", + if (pmu_fd < 0 && errno == ENOENT) { + printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__); + test__skip(); + goto cleanup; + } + if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd, errno)) - goto close_prog; + goto cleanup; - link = bpf_program__attach_perf_event(prog, pmu_fd); - if (CHECK(IS_ERR(link), "attach_perf_event", - "err %ld\n", PTR_ERR(link))) { + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.oncpu))) { close(pmu_fd); - goto close_prog; + goto cleanup; } /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") - == 0); - assert(system("taskset 0x1 ./urandom_read 100000") == 0); + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + + if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) + goto cleanup; + if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000"))) + goto cleanup; /* disable stack trace collection */ key = 0; val = 1; @@ -96,23 +86,23 @@ retry: err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = extract_build_id(buf, 256); if (CHECK(err, "get build_id with readelf", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = bpf_map_get_next_key(stackmap_fd, NULL, &key); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; do { char build_id[64]; @@ -120,7 +110,7 @@ retry: err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { @@ -138,8 +128,7 @@ retry: * try it one more time. */ if (build_id_matches < 1 && retry--) { - bpf_link__destroy(link); - bpf_object__close(obj); + test_stacktrace_build_id__destroy(skel); printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", __func__); goto retry; @@ -147,7 +136,7 @@ retry: if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) - goto disable_pmu; + goto cleanup; /* * We intentionally skip compare_stack_ips(). This is because we @@ -156,8 +145,6 @@ retry: * BPF_STACK_BUILD_ID_IP; */ -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); +cleanup: + test_stacktrace_build_id__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index fc539335c5b3..37269d23df93 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -26,19 +26,19 @@ void test_stacktrace_map(void) /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (control_map_fd < 0) + if (CHECK_FAIL(control_map_fd < 0)) goto disable_pmu; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (stackid_hmap_fd < 0) + if (CHECK_FAIL(stackid_hmap_fd < 0)) goto disable_pmu; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (stackmap_fd < 0) + if (CHECK_FAIL(stackmap_fd < 0)) goto disable_pmu; stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (stack_amap_fd < 0) + if (CHECK_FAIL(stack_amap_fd < 0)) goto disable_pmu; /* give some time for bpf program run */ @@ -55,23 +55,20 @@ void test_stacktrace_map(void) err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno)) - goto disable_pmu_noerr; + goto disable_pmu; - goto disable_pmu_noerr; disable_pmu: - error_cnt++; -disable_pmu_noerr: bpf_link__destroy(link); close_prog: bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c index fbfa8e76cf63..404a5498e1a3 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c @@ -26,15 +26,15 @@ void test_stacktrace_map_raw_tp(void) /* find map fds */ control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (control_map_fd < 0) + if (CHECK_FAIL(control_map_fd < 0)) goto close_prog; stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (stackid_hmap_fd < 0) + if (CHECK_FAIL(stackid_hmap_fd < 0)) goto close_prog; stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (stackmap_fd < 0) + if (CHECK_FAIL(stackmap_fd < 0)) goto close_prog; /* give some time for bpf program run */ @@ -58,10 +58,7 @@ void test_stacktrace_map_raw_tp(void) "err %d errno %d\n", err, errno)) goto close_prog; - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: if (!IS_ERR_OR_NULL(link)) bpf_link__destroy(link); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c new file mode 100644 index 000000000000..bb8fe646dd9f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +/* test_tailcall_1 checks basic functionality by patching multiple locations + * in a single program for a single tail call slot with nop->jmp, jmp->nop + * and jmp->jmp rewrites. Also checks for nop->nop. + */ +static void test_tailcall_1(void) +{ + int err, map_fd, prog_fd, main_fd, i, j; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + char buff[128] = {}; + + err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != i, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + j = bpf_map__def(prog_array)->max_entries - 1 - i; + snprintf(prog_name, sizeof(prog_name), "classifier/%i", j); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + j = bpf_map__def(prog_array)->max_entries - 1 - i; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != j, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err >= 0 || errno != ENOENT)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + } + +out: + bpf_object__close(obj); +} + +/* test_tailcall_2 checks that patching multiple programs for a single + * tail call slot works. It also jumps through several programs and tests + * the tail call limit counter. + */ +static void test_tailcall_2(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + char buff[128] = {}; + + err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 2; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_3 checks that the count value of the tail call limit + * enforcement matches with expectations. + */ +static void test_tailcall_3(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char buff[128] = {}; + + err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + prog = bpf_object__find_program_by_title(obj, "classifier/0"); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_4 checks that the kernel properly selects indirect jump + * for the case where the key is not known. Latter is passed via global + * data to select different targets we can compare return value of. + */ +static void test_tailcall_4(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + static const int zero = 0; + char buff[128] = {}; + char prog_name[32]; + + err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != i, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + } +out: + bpf_object__close(obj); +} + +/* test_tailcall_5 probes similarly to test_tailcall_4 that the kernel generates + * an indirect jump when the keys are const but different from different branches. + */ +static void test_tailcall_5(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, key[] = { 1111, 1234, 5678 }; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + static const int zero = 0; + char buff[128] = {}; + char prog_name[32]; + + err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj, + &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != i, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + } + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 3, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + } +out: + bpf_object__close(obj); +} + +void test_tailcalls(void) +{ + if (test__start_subtest("tailcall_1")) + test_tailcall_1(); + if (test__start_subtest("tailcall_2")) + test_tailcall_2(); + if (test__start_subtest("tailcall_3")) + test_tailcall_3(); + if (test__start_subtest("tailcall_4")) + test_tailcall_4(); + if (test__start_subtest("tailcall_5")) + test_tailcall_5(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c index 958a3d88de99..1bdc1d86a50c 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c @@ -70,9 +70,6 @@ void test_task_fd_query_rawtp(void) if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len)) goto close_prog; - goto close_prog_noerr; close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c index f9b70e81682b..3f131b8fe328 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c @@ -62,14 +62,9 @@ static void test_task_fd_query_tp_core(const char *probe_name, fd_type, buf)) goto close_pmu; - close(pmu_fd); - goto close_prog_noerr; - close_pmu: close(pmu_fd); close_prog: - error_cnt++; -close_prog_noerr: bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c index bb8759d69099..594307dffd13 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c @@ -10,10 +10,8 @@ void test_tcp_estats(void) err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); CHECK(err, "", "err %d errno %d\n", err, errno); - if (err) { - error_cnt++; + if (err) return; - } bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/test_tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 90c3862f74a8..f4cd60d6fba2 100644 --- a/tools/testing/selftests/bpf/test_tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -1,23 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <error.h> -#include <errno.h> -#include <stdio.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/socket.h> -#include <netinet/in.h> -#include <pthread.h> - -#include <linux/filter.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_rlimit.h" -#include "bpf_util.h" +#include <test_progs.h> #include "cgroup_helpers.h" -#define CG_PATH "/tcp_rtt" - struct tcp_rtt_storage { __u32 invoked; __u32 dsack_dups; @@ -30,8 +14,32 @@ static void send_byte(int fd) { char b = 0x55; - if (write(fd, &b, sizeof(b)) != 1) - error(1, errno, "Failed to send single byte"); + if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1)) + perror("Failed to send single byte"); +} + +static int wait_for_ack(int fd, int retries) +{ + struct tcp_info info; + socklen_t optlen; + int i, err; + + for (i = 0; i < retries; i++) { + optlen = sizeof(info); + err = getsockopt(fd, SOL_TCP, TCP_INFO, &info, &optlen); + if (err < 0) { + log_err("Failed to lookup TCP stats"); + return err; + } + + if (info.tcpi_unacked == 0) + return 0; + + usleep(10); + } + + log_err("Did not receive ACK"); + return -1; } static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, @@ -41,8 +49,10 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked, int err = 0; struct tcp_rtt_storage val; - if (bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0) - error(1, errno, "Failed to read socket storage"); + if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) { + perror("Failed to read socket storage"); + return -1; + } if (val.invoked != invoked) { log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d", @@ -149,6 +159,11 @@ static int run_test(int cgroup_fd, int server_fd) /*icsk_retransmits=*/0); send_byte(client_fd); + if (wait_for_ack(client_fd, 100) < 0) { + err = -1; + goto close_client_fd; + } + err += verify_sk(map_fd, client_fd, "first payload byte", /*invoked=*/2, @@ -157,6 +172,7 @@ static int run_test(int cgroup_fd, int server_fd) /*delivered_ce=*/0, /*icsk_retransmits=*/0); +close_client_fd: close(client_fd); close_bpf_object: @@ -187,68 +203,72 @@ static int start_server(void) return fd; } +static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; + static void *server_thread(void *arg) { struct sockaddr_storage addr; socklen_t len = sizeof(addr); int fd = *(int *)arg; int client_fd; + int err; + + err = listen(fd, 1); + + pthread_mutex_lock(&server_started_mtx); + pthread_cond_signal(&server_started); + pthread_mutex_unlock(&server_started_mtx); - if (listen(fd, 1) < 0) - error(1, errno, "Failed to listed on socket"); + if (CHECK_FAIL(err < 0)) { + perror("Failed to listed on socket"); + return NULL; + } client_fd = accept(fd, (struct sockaddr *)&addr, &len); - if (client_fd < 0) - error(1, errno, "Failed to accept client"); + if (CHECK_FAIL(client_fd < 0)) { + perror("Failed to accept client"); + return NULL; + } /* Wait for the next connection (that never arrives) * to keep this thread alive to prevent calling * close() on client_fd. */ - if (accept(fd, (struct sockaddr *)&addr, &len) >= 0) - error(1, errno, "Unexpected success in second accept"); + if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { + perror("Unexpected success in second accept"); + return NULL; + } close(client_fd); return NULL; } -int main(int args, char **argv) +void test_tcp_rtt(void) { int server_fd, cgroup_fd; - int err = EXIT_SUCCESS; pthread_t tid; - if (setup_cgroup_environment()) - goto cleanup_obj; - - cgroup_fd = create_and_get_cgroup(CG_PATH); - if (cgroup_fd < 0) - goto cleanup_cgroup_env; - - if (join_cgroup(CG_PATH)) - goto cleanup_cgroup; + cgroup_fd = test__join_cgroup("/tcp_rtt"); + if (CHECK_FAIL(cgroup_fd < 0)) + return; server_fd = start_server(); - if (server_fd < 0) { - err = EXIT_FAILURE; - goto cleanup_cgroup; - } + if (CHECK_FAIL(server_fd < 0)) + goto close_cgroup_fd; - pthread_create(&tid, NULL, server_thread, (void *)&server_fd); + if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread, + (void *)&server_fd))) + goto close_server_fd; - if (run_test(cgroup_fd, server_fd)) - err = EXIT_FAILURE; + pthread_mutex_lock(&server_started_mtx); + pthread_cond_wait(&server_started, &server_started_mtx); + pthread_mutex_unlock(&server_started_mtx); + CHECK_FAIL(run_test(cgroup_fd, server_fd)); +close_server_fd: close(server_fd); - - printf("test_sockopt_sk: %s\n", - err == EXIT_SUCCESS ? "PASSED" : "FAILED"); - -cleanup_cgroup: +close_cgroup_fd: close(cgroup_fd); -cleanup_cgroup_env: - cleanup_cgroup_environment(); -cleanup_obj: - return err; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c new file mode 100644 index 000000000000..25b068591e9a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> + +const char *err_str; +bool found; + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + char *log_buf; + + if (level != LIBBPF_WARN || + strcmp(format, "libbpf: \n%s\n")) { + vprintf(format, args); + return 0; + } + + log_buf = va_arg(args, char *); + if (!log_buf) + goto out; + if (strstr(log_buf, err_str) == 0) + found = true; +out: + printf(format, log_buf); + return 0; +} + +extern int extra_prog_load_log_flags; + +static int check_load(const char *file) +{ + struct bpf_prog_load_attr attr; + struct bpf_object *obj = NULL; + int err, prog_fd; + + memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); + attr.file = file; + attr.prog_type = BPF_PROG_TYPE_UNSPEC; + attr.log_level = extra_prog_load_log_flags; + attr.prog_flags = BPF_F_TEST_RND_HI32; + found = false; + err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + bpf_object__close(obj); + return err; +} + +struct test_def { + const char *file; + const char *err_str; +}; + +void test_test_global_funcs(void) +{ + struct test_def tests[] = { + { "test_global_func1.o", "combined stack size of 4 calls is 544" }, + { "test_global_func2.o" }, + { "test_global_func3.o" , "the call stack of 8 frames" }, + { "test_global_func4.o" }, + { "test_global_func5.o" , "expected pointer to ctx, but got PTR" }, + { "test_global_func6.o" , "modified ctx ptr R2" }, + { "test_global_func7.o" , "foo() doesn't return scalar" }, + }; + libbpf_print_fn_t old_print_fn = NULL; + int err, i, duration = 0; + + old_print_fn = libbpf_set_print(libbpf_debug_print); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + const struct test_def *test = &tests[i]; + + if (!test__start_subtest(test->file)) + continue; + + err_str = test->err_str; + err = check_load(test->file); + CHECK_FAIL(!!err ^ !!err_str); + if (err_str) + CHECK(found, "", "expected string '%s'", err_str); + } + libbpf_set_print(old_print_fn); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c new file mode 100644 index 000000000000..465b371a561d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <sys/prctl.h> +#include <test_progs.h> + +#define MAX_CNT 100000 + +static __u64 time_get_ns(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts.tv_sec * 1000000000ull + ts.tv_nsec; +} + +static int test_task_rename(const char *prog) +{ + int i, fd, duration = 0, err; + char buf[] = "test_overhead"; + __u64 start_time; + + fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(fd < 0, "open /proc", "err %d", errno)) + return -1; + start_time = time_get_ns(); + for (i = 0; i < MAX_CNT; i++) { + err = write(fd, buf, sizeof(buf)); + if (err < 0) { + CHECK(err < 0, "task rename", "err %d", errno); + close(fd); + return -1; + } + } + printf("task_rename %s\t%lluK events per sec\n", prog, + MAX_CNT * 1000000ll / (time_get_ns() - start_time)); + close(fd); + return 0; +} + +static void test_run(const char *prog) +{ + test_task_rename(prog); +} + +static void setaffinity(void) +{ + cpu_set_t cpuset; + int cpu = 0; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); +} + +void test_test_overhead(void) +{ + const char *kprobe_name = "kprobe/__set_task_comm"; + const char *kretprobe_name = "kretprobe/__set_task_comm"; + const char *raw_tp_name = "raw_tp/task_rename"; + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; + const char *kprobe_func = "__set_task_comm"; + struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; + struct bpf_program *fentry_prog, *fexit_prog; + struct bpf_object *obj; + struct bpf_link *link; + int err, duration = 0; + char comm[16] = {}; + + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + return; + + obj = bpf_object__open_file("./test_overhead.o", NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + return; + + kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); + if (CHECK(!kprobe_prog, "find_probe", + "prog '%s' not found\n", kprobe_name)) + goto cleanup; + kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); + if (CHECK(!kretprobe_prog, "find_probe", + "prog '%s' not found\n", kretprobe_name)) + goto cleanup; + raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name); + if (CHECK(!raw_tp_prog, "find_probe", + "prog '%s' not found\n", raw_tp_name)) + goto cleanup; + fentry_prog = bpf_object__find_program_by_title(obj, fentry_name); + if (CHECK(!fentry_prog, "find_probe", + "prog '%s' not found\n", fentry_name)) + goto cleanup; + fexit_prog = bpf_object__find_program_by_title(obj, fexit_name); + if (CHECK(!fexit_prog, "find_probe", + "prog '%s' not found\n", fexit_name)) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + + setaffinity(); + + /* base line run */ + test_run("base"); + + /* attach kprobe */ + link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */, + kprobe_func); + if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("kprobe"); + bpf_link__destroy(link); + + /* attach kretprobe */ + link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */, + kprobe_func); + if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("kretprobe"); + bpf_link__destroy(link); + + /* attach raw_tp */ + link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename"); + if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("raw_tp"); + bpf_link__destroy(link); + + /* attach fentry */ + link = bpf_program__attach_trace(fentry_prog); + if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("fentry"); + bpf_link__destroy(link); + + /* attach fexit */ + link = bpf_program__attach_trace(fexit_prog); + if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link))) + goto cleanup; + test_run("fexit"); + bpf_link__destroy(link); +cleanup: + prctl(PR_SET_NAME, comm, 0L, 0L, 0L); + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c new file mode 100644 index 000000000000..1f6ccdaed1ac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE +#include <sched.h> +#include <sys/prctl.h> +#include <test_progs.h> + +#define MAX_TRAMP_PROGS 40 + +struct inst { + struct bpf_object *obj; + struct bpf_link *link_fentry; + struct bpf_link *link_fexit; +}; + +static int test_task_rename(void) +{ + int fd, duration = 0, err; + char buf[] = "test_overhead"; + + fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(fd < 0, "open /proc", "err %d", errno)) + return -1; + err = write(fd, buf, sizeof(buf)); + if (err < 0) { + CHECK(err < 0, "task rename", "err %d", errno); + close(fd); + return -1; + } + close(fd); + return 0; +} + +static struct bpf_link *load(struct bpf_object *obj, const char *name) +{ + struct bpf_program *prog; + int duration = 0; + + prog = bpf_object__find_program_by_title(obj, name); + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name)) + return ERR_PTR(-EINVAL); + return bpf_program__attach_trace(prog); +} + +void test_trampoline_count(void) +{ + const char *fentry_name = "fentry/__set_task_comm"; + const char *fexit_name = "fexit/__set_task_comm"; + const char *object = "test_trampoline_count.o"; + struct inst inst[MAX_TRAMP_PROGS] = {}; + int err, i = 0, duration = 0; + struct bpf_object *obj; + struct bpf_link *link; + char comm[16] = {}; + + /* attach 'allowed' 40 trampoline programs */ + for (i = 0; i < MAX_TRAMP_PROGS; i++) { + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup; + inst[i].obj = obj; + + if (rand() % 2) { + link = load(obj, fentry_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fentry = link; + } else { + link = load(obj, fexit_name); + if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) + goto cleanup; + inst[i].link_fexit = link; + } + } + + /* and try 1 extra.. */ + obj = bpf_object__open_file(object, NULL); + if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) + goto cleanup; + + err = bpf_object__load(obj); + if (CHECK(err, "obj_load", "err %d\n", err)) + goto cleanup_extra; + + /* ..that needs to fail */ + link = load(obj, fentry_name); + if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) { + bpf_link__destroy(link); + goto cleanup_extra; + } + + /* with E2BIG error */ + CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link)); + + /* and finaly execute the probe */ + if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L))) + goto cleanup_extra; + CHECK_FAIL(test_task_rename()); + CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L)); + +cleanup_extra: + bpf_object__close(obj); +cleanup: + while (--i) { + bpf_link__destroy(inst[i].link_fentry); + bpf_link__destroy(inst[i].link_fexit); + bpf_object__close(inst[i].obj); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c index a74167289545..dcb5ecac778e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp.c @@ -16,10 +16,8 @@ void test_xdp(void) int err, prog_fd, map_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip2tnl"); if (map_fd < 0) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index 922aa0a19764..3744196d7cba 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -10,10 +10,8 @@ void test_xdp_adjust_tail(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c new file mode 100644 index 000000000000..6b56bdc73ebc --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <net/if.h> +#include "test_xdp.skel.h" +#include "test_xdp_bpf2bpf.skel.h" + +void test_xdp_bpf2bpf(void) +{ + __u32 duration = 0, retval, size; + char buf[128]; + int err, pkt_fd, map_fd; + struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); + struct iptnl_info value4 = {.family = AF_INET}; + struct test_xdp *pkt_skel = NULL; + struct test_xdp_bpf2bpf *ftrace_skel = NULL; + struct vip key4 = {.protocol = 6, .family = AF_INET}; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* Load XDP program to introspect */ + pkt_skel = test_xdp__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) + return; + + pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); + + map_fd = bpf_map__fd(pkt_skel->maps.vip2tnl); + bpf_map_update_elem(map_fd, &key4, &value4, 0); + + /* Load trace program */ + opts.attach_prog_fd = pkt_fd, + ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts); + if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) + goto out; + + err = test_xdp_bpf2bpf__load(ftrace_skel); + if (CHECK(err, "__load", "ftrace skeleton failed\n")) + goto out; + + err = test_xdp_bpf2bpf__attach(ftrace_skel); + if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) + goto out; + + /* Run test program */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + buf, &size, &retval, &duration); + + if (CHECK(err || retval != XDP_TX || size != 74 || + iph->protocol != IPPROTO_IPIP, "ipv4", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size)) + goto out; + + /* Verify test results */ + if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), + "result", "fentry failed err %llu\n", + ftrace_skel->bss->test_result_fentry)) + goto out; + + CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", + "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); + +out: + test_xdp__destroy(pkt_skel); + test_xdp_bpf2bpf__destroy(ftrace_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index 09e6b46f5515..c9404e6b226e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -31,10 +31,8 @@ void test_xdp_noinline(void) u32 *magic = (u32 *)buf; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) { - error_cnt++; + if (CHECK_FAIL(err)) return; - } map_fd = bpf_find_map(__func__, obj, "vip_map"); if (map_fd < 0) @@ -73,9 +71,10 @@ void test_xdp_noinline(void) bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { - error_cnt++; - printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); + if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || + pkts != NUM_ITER * 2)) { + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", + bytes, pkts); } out: bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c new file mode 100644 index 000000000000..7185bee16fe4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_xdp_perf(void) +{ + const char *file = "./xdp_dummy.o"; + __u32 duration, retval, size; + struct bpf_object *obj; + char in[128], out[128]; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128, + out, &size, &retval, &duration); + + CHECK(err || retval != XDP_PASS || size != 128, + "xdp-perf", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c new file mode 100644 index 000000000000..7897c8f4d363 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* WARNING: This implemenation is not necessarily the same + * as the tcp_cubic.c. The purpose is mainly for testing + * the kernel BPF logic. + * + * Highlights: + * 1. CONFIG_HZ .kconfig map is used. + * 2. In bictcp_update(), calculation is changed to use usec + * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies. + * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c. + * 3. In bitctcp_update() [under tcp_friendliness], the original + * "while (ca->ack_cnt > delta)" loop is changed to the equivalent + * "ca->ack_cnt / delta" operation. + */ + +#include <linux/bpf.h> +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation + * max_cwnd = snd_cwnd * beta + */ +#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ + +/* Two methods of hybrid slow start */ +#define HYSTART_ACK_TRAIN 0x1 +#define HYSTART_DELAY 0x2 + +/* Number of delay samples for detecting the increase of delay */ +#define HYSTART_MIN_SAMPLES 8 +#define HYSTART_DELAY_MIN (4000U) /* 4ms */ +#define HYSTART_DELAY_MAX (16000U) /* 16 ms */ +#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) + +static int fast_convergence = 1; +static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ +static int initial_ssthresh; +static const int bic_scale = 41; +static int tcp_friendliness = 1; + +static int hystart = 1; +static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY; +static int hystart_low_window = 16; +static int hystart_ack_delta_us = 2000; + +static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ +static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 + / (BICTCP_BETA_SCALE - beta); +/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 + * so K = cubic_root( (wmax-cwnd)*rtt/c ) + * the unit of K is bictcp_HZ=2^10, not HZ + * + * c = bic_scale >> 10 + * rtt = 100ms + * + * the following code has been designed and tested for + * cwnd < 1 million packets + * RTT < 100 seconds + * HZ < 1,000,00 (corresponding to 10 nano-second) + */ + +/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */ +static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) + / (bic_scale * 10); + +/* BIC TCP Parameters */ +struct bictcp { + __u32 cnt; /* increase cwnd by 1 after ACKs */ + __u32 last_max_cwnd; /* last maximum snd_cwnd */ + __u32 last_cwnd; /* the last snd_cwnd */ + __u32 last_time; /* time when updated last_cwnd */ + __u32 bic_origin_point;/* origin point of bic function */ + __u32 bic_K; /* time to origin point + from the beginning of the current epoch */ + __u32 delay_min; /* min delay (usec) */ + __u32 epoch_start; /* beginning of an epoch */ + __u32 ack_cnt; /* number of acks */ + __u32 tcp_cwnd; /* estimated tcp cwnd */ + __u16 unused; + __u8 sample_cnt; /* number of samples to decide curr_rtt */ + __u8 found; /* the exit point is found? */ + __u32 round_start; /* beginning of each round */ + __u32 end_seq; /* end_seq of the round */ + __u32 last_ack; /* last time when the ACK spacing is close */ + __u32 curr_rtt; /* the minimum rtt of current round */ +}; + +static inline void bictcp_reset(struct bictcp *ca) +{ + ca->cnt = 0; + ca->last_max_cwnd = 0; + ca->last_cwnd = 0; + ca->last_time = 0; + ca->bic_origin_point = 0; + ca->bic_K = 0; + ca->delay_min = 0; + ca->epoch_start = 0; + ca->ack_cnt = 0; + ca->tcp_cwnd = 0; + ca->found = 0; +} + +extern unsigned long CONFIG_HZ __kconfig; +#define HZ CONFIG_HZ +#define USEC_PER_MSEC 1000UL +#define USEC_PER_SEC 1000000UL +#define USEC_PER_JIFFY (USEC_PER_SEC / HZ) + +static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) +{ + return dividend / divisor; +} + +#define div64_ul div64_u64 + +#define BITS_PER_U64 (sizeof(__u64) * 8) +static __always_inline int fls64(__u64 x) +{ + int num = BITS_PER_U64 - 1; + + if (x == 0) + return 0; + + if (!(x & (~0ull << (BITS_PER_U64-32)))) { + num -= 32; + x <<= 32; + } + if (!(x & (~0ull << (BITS_PER_U64-16)))) { + num -= 16; + x <<= 16; + } + if (!(x & (~0ull << (BITS_PER_U64-8)))) { + num -= 8; + x <<= 8; + } + if (!(x & (~0ull << (BITS_PER_U64-4)))) { + num -= 4; + x <<= 4; + } + if (!(x & (~0ull << (BITS_PER_U64-2)))) { + num -= 2; + x <<= 2; + } + if (!(x & (~0ull << (BITS_PER_U64-1)))) + num -= 1; + + return num + 1; +} + +static __always_inline __u32 bictcp_clock_us(const struct sock *sk) +{ + return tcp_sk(sk)->tcp_mstamp; +} + +static __always_inline void bictcp_hystart_reset(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->round_start = ca->last_ack = bictcp_clock_us(sk); + ca->end_seq = tp->snd_nxt; + ca->curr_rtt = ~0U; + ca->sample_cnt = 0; +} + +/* "struct_ops/" prefix is not a requirement + * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS + * as long as it is used in one of the func ptr + * under SEC(".struct_ops"). + */ +SEC("struct_ops/bictcp_init") +void BPF_PROG(bictcp_init, struct sock *sk) +{ + struct bictcp *ca = inet_csk_ca(sk); + + bictcp_reset(ca); + + if (hystart) + bictcp_hystart_reset(sk); + + if (!hystart && initial_ssthresh) + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; +} + +/* No prefix in SEC will also work. + * The remaining tcp-cubic functions have an easier way. + */ +SEC("no-sec-prefix-bictcp_cwnd_event") +void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event) +{ + if (event == CA_EVENT_TX_START) { + struct bictcp *ca = inet_csk_ca(sk); + __u32 now = tcp_jiffies32; + __s32 delta; + + delta = now - tcp_sk(sk)->lsndtime; + + /* We were application limited (idle) for a while. + * Shift epoch_start to keep cwnd growth to cubic curve. + */ + if (ca->epoch_start && delta > 0) { + ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; + } + return; + } +} + +/* + * cbrt(x) MSB values for x MSB values in [0..63]. + * Precomputed then refined by hand - Willy Tarreau + * + * For x in [0..63], + * v = cbrt(x << 18) - 1 + * cbrt(x) = (v[x] + 10) >> 6 + */ +static const __u8 v[] = { + /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, + /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, + /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, + /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, + /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, + /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, + /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, + /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, +}; + +/* calculate the cubic root of x using a table lookup followed by one + * Newton-Raphson iteration. + * Avg err ~= 0.195% + */ +static __always_inline __u32 cubic_root(__u64 a) +{ + __u32 x, b, shift; + + if (a < 64) { + /* a in [0..63] */ + return ((__u32)v[(__u32)a] + 35) >> 6; + } + + b = fls64(a); + b = ((b * 84) >> 8) - 1; + shift = (a >> (b * 3)); + + /* it is needed for verifier's bound check on v */ + if (shift >= 64) + return 0; + + x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6; + + /* + * Newton-Raphson iteration + * 2 + * x = ( 2 * x + a / x ) / 3 + * k+1 k k + */ + x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1))); + x = ((x * 341) >> 10); + return x; +} + +/* + * Compute congestion window to use. + */ +static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, + __u32 acked) +{ + __u32 delta, bic_target, max_cnt; + __u64 offs, t; + + ca->ack_cnt += acked; /* count the number of ACKed packets */ + + if (ca->last_cwnd == cwnd && + (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) + return; + + /* The CUBIC function can update ca->cnt at most once per jiffy. + * On all cwnd reduction events, ca->epoch_start is set to 0, + * which will force a recalculation of ca->cnt. + */ + if (ca->epoch_start && tcp_jiffies32 == ca->last_time) + goto tcp_friendliness; + + ca->last_cwnd = cwnd; + ca->last_time = tcp_jiffies32; + + if (ca->epoch_start == 0) { + ca->epoch_start = tcp_jiffies32; /* record beginning */ + ca->ack_cnt = acked; /* start counting */ + ca->tcp_cwnd = cwnd; /* syn with cubic */ + + if (ca->last_max_cwnd <= cwnd) { + ca->bic_K = 0; + ca->bic_origin_point = cwnd; + } else { + /* Compute new K based on + * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) + */ + ca->bic_K = cubic_root(cube_factor + * (ca->last_max_cwnd - cwnd)); + ca->bic_origin_point = ca->last_max_cwnd; + } + } + + /* cubic function - calc*/ + /* calculate c * time^3 / rtt, + * while considering overflow in calculation of time^3 + * (so time^3 is done by using 64 bit) + * and without the support of division of 64bit numbers + * (so all divisions are done by using 32 bit) + * also NOTE the unit of those veriables + * time = (t - K) / 2^bictcp_HZ + * c = bic_scale >> 10 + * rtt = (srtt >> 3) / HZ + * !!! The following code does not have overflow problems, + * if the cwnd < 1 million packets !!! + */ + + t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY; + t += ca->delay_min; + /* change the unit from usec to bictcp_HZ */ + t <<= BICTCP_HZ; + t /= USEC_PER_SEC; + + if (t < ca->bic_K) /* t - K */ + offs = ca->bic_K - t; + else + offs = t - ca->bic_K; + + /* c/rtt * (t-K)^3 */ + delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); + if (t < ca->bic_K) /* below origin*/ + bic_target = ca->bic_origin_point - delta; + else /* above origin*/ + bic_target = ca->bic_origin_point + delta; + + /* cubic function - calc bictcp_cnt*/ + if (bic_target > cwnd) { + ca->cnt = cwnd / (bic_target - cwnd); + } else { + ca->cnt = 100 * cwnd; /* very small increment*/ + } + + /* + * The initial growth of cubic function may be too conservative + * when the available bandwidth is still unknown. + */ + if (ca->last_max_cwnd == 0 && ca->cnt > 20) + ca->cnt = 20; /* increase cwnd 5% per RTT */ + +tcp_friendliness: + /* TCP Friendly */ + if (tcp_friendliness) { + __u32 scale = beta_scale; + __u32 n; + + /* update tcp cwnd */ + delta = (cwnd * scale) >> 3; + if (ca->ack_cnt > delta && delta) { + n = ca->ack_cnt / delta; + ca->ack_cnt -= n * delta; + ca->tcp_cwnd += n; + } + + if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ + delta = ca->tcp_cwnd - cwnd; + max_cnt = cwnd / delta; + if (ca->cnt > max_cnt) + ca->cnt = max_cnt; + } + } + + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. + */ + ca->cnt = max(ca->cnt, 2U); +} + +/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ +void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + if (!tcp_is_cwnd_limited(sk)) + return; + + if (tcp_in_slow_start(tp)) { + if (hystart && after(ack, ca->end_seq)) + bictcp_hystart_reset(sk); + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + bictcp_update(ca, tp->snd_cwnd, acked); + tcp_cong_avoid_ai(tp, ca->cnt, acked); +} + +__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + + ca->epoch_start = 0; /* end of epoch */ + + /* Wmax and fast convergence */ + if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) + ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) + / (2 * BICTCP_BETA_SCALE); + else + ca->last_max_cwnd = tp->snd_cwnd; + + return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); +} + +void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state) +{ + if (new_state == TCP_CA_Loss) { + bictcp_reset(inet_csk_ca(sk)); + bictcp_hystart_reset(sk); + } +} + +#define GSO_MAX_SIZE 65536 + +/* Account for TSO/GRO delays. + * Otherwise short RTT flows could get too small ssthresh, since during + * slow start we begin with small TSO packets and ca->delay_min would + * not account for long aggregation delay when TSO packets get bigger. + * Ideally even with a very small RTT we would like to have at least one + * TSO packet being sent and received by GRO, and another one in qdisc layer. + * We apply another 100% factor because @rate is doubled at this point. + * We cap the cushion to 1ms. + */ +static __always_inline __u32 hystart_ack_delay(struct sock *sk) +{ + unsigned long rate; + + rate = sk->sk_pacing_rate; + if (!rate) + return 0; + return min((__u64)USEC_PER_MSEC, + div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); +} + +static __always_inline void hystart_update(struct sock *sk, __u32 delay) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + __u32 threshold; + + if (hystart_detect & HYSTART_ACK_TRAIN) { + __u32 now = bictcp_clock_us(sk); + + /* first detection parameter - ack-train detection */ + if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) { + ca->last_ack = now; + + threshold = ca->delay_min + hystart_ack_delay(sk); + + /* Hystart ack train triggers if we get ack past + * ca->delay_min/2. + * Pacing might have delayed packets up to RTT/2 + * during slow start. + */ + if (sk->sk_pacing_status == SK_PACING_NONE) + threshold >>= 1; + + if ((__s32)(now - ca->round_start) > threshold) { + ca->found = 1; + tp->snd_ssthresh = tp->snd_cwnd; + } + } + } + + if (hystart_detect & HYSTART_DELAY) { + /* obtain the minimum delay of more than sampling packets */ + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { + if (ca->curr_rtt > delay) + ca->curr_rtt = delay; + + ca->sample_cnt++; + } else { + if (ca->curr_rtt > ca->delay_min + + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { + ca->found = 1; + tp->snd_ssthresh = tp->snd_cwnd; + } + } + } +} + +void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk, + const struct ack_sample *sample) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); + __u32 delay; + + /* Some calls are for duplicates without timetamps */ + if (sample->rtt_us < 0) + return; + + /* Discard delay samples right after fast recovery */ + if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ) + return; + + delay = sample->rtt_us; + if (delay == 0) + delay = 1; + + /* first time call or link delay decreases */ + if (ca->delay_min == 0 || ca->delay_min > delay) + ca->delay_min = delay; + + /* hystart triggers when cwnd is larger than some threshold */ + if (!ca->found && tcp_in_slow_start(tp) && hystart && + tp->snd_cwnd >= hystart_low_window) + hystart_update(sk, delay); +} + +__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + + return max(tp->snd_cwnd, tp->prior_cwnd); +} + +SEC(".struct_ops") +struct tcp_congestion_ops cubic = { + .init = (void *)bictcp_init, + .ssthresh = (void *)bictcp_recalc_ssthresh, + .cong_avoid = (void *)bictcp_cong_avoid, + .set_state = (void *)bictcp_state, + .undo_cwnd = (void *)tcp_reno_undo_cwnd, + .cwnd_event = (void *)bictcp_cwnd_event, + .pkts_acked = (void *)bictcp_acked, + .name = "bpf_cubic", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c new file mode 100644 index 000000000000..b631fb5032d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +/* WARNING: This implemenation is not necessarily the same + * as the tcp_dctcp.c. The purpose is mainly for testing + * the kernel BPF logic. + */ + +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +#define DCTCP_MAX_ALPHA 1024U + +struct dctcp { + __u32 old_delivered; + __u32 old_delivered_ce; + __u32 prior_rcv_nxt; + __u32 dctcp_alpha; + __u32 next_seq; + __u32 ce_state; + __u32 loss_cwnd; +}; + +static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ +static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; + +static __always_inline void dctcp_reset(const struct tcp_sock *tp, + struct dctcp *ca) +{ + ca->next_seq = tp->snd_nxt; + + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; +} + +SEC("struct_ops/dctcp_init") +void BPF_PROG(dctcp_init, struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct dctcp *ca = inet_csk_ca(sk); + + ca->prior_rcv_nxt = tp->rcv_nxt; + ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); + ca->loss_cwnd = 0; + ca->ce_state = 0; + + dctcp_reset(tp, ca); +} + +SEC("struct_ops/dctcp_ssthresh") +__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + ca->loss_cwnd = tp->snd_cwnd; + return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); +} + +SEC("struct_ops/dctcp_update_alpha") +void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct dctcp *ca = inet_csk_ca(sk); + + /* Expired RTT */ + if (!before(tp->snd_una, ca->next_seq)) { + __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; + __u32 alpha = ca->dctcp_alpha; + + /* alpha = (1 - g) * alpha + g * F */ + + alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); + if (delivered_ce) { + __u32 delivered = tp->delivered - ca->old_delivered; + + /* If dctcp_shift_g == 1, a 32bit value would overflow + * after 8 M packets. + */ + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); + + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); + } + ca->dctcp_alpha = alpha; + dctcp_reset(tp, ca); + } +} + +static __always_inline void dctcp_react_to_loss(struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +SEC("struct_ops/dctcp_state") +void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ +} + +static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (ce_state == 1) + tp->ecn_flags |= TCP_ECN_DEMAND_CWR; + else + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; +} + +/* Minimal DCTP CE state machine: + * + * S: 0 <- last pkt was non-CE + * 1 <- last pkt was CE + */ +static __always_inline +void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, + __u32 *prior_rcv_nxt, __u32 *ce_state) +{ + __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; + + if (*ce_state != new_ce_state) { + /* CE state has changed, force an immediate ACK to + * reflect the new CE state. If an ACK was delayed, + * send that first to reflect the prior CE state. + */ + if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { + dctcp_ece_ack_cwr(sk, *ce_state); + bpf_tcp_send_ack(sk, *prior_rcv_nxt); + } + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } + *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt; + *ce_state = new_ce_state; + dctcp_ece_ack_cwr(sk, new_ce_state); +} + +SEC("struct_ops/dctcp_cwnd_event") +void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) +{ + struct dctcp *ca = inet_csk_ca(sk); + + switch (ev) { + case CA_EVENT_ECN_IS_CE: + case CA_EVENT_ECN_NO_CE: + dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); + break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; + default: + /* Don't care for the rest. */ + break; + } +} + +SEC("struct_ops/dctcp_cwnd_undo") +__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) +{ + const struct dctcp *ca = inet_csk_ca(sk); + + return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); +} + +SEC("struct_ops/tcp_reno_cong_avoid") +void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (!tcp_is_cwnd_limited(sk)) + return; + + /* In "safe" area, increase. */ + if (tcp_in_slow_start(tp)) { + acked = tcp_slow_start(tp, acked); + if (!acked) + return; + } + /* In dangerous area, increase slowly. */ + tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked); +} + +SEC(".struct_ops") +struct tcp_congestion_ops dctcp_nouse = { + .init = (void *)dctcp_init, + .set_state = (void *)dctcp_state, + .flags = TCP_CONG_NEEDS_ECN, + .name = "bpf_dctcp_nouse", +}; + +SEC(".struct_ops") +struct tcp_congestion_ops dctcp = { + .init = (void *)dctcp_init, + .in_ack_event = (void *)dctcp_update_alpha, + .cwnd_event = (void *)dctcp_cwnd_event, + .ssthresh = (void *)dctcp_ssthresh, + .cong_avoid = (void *)tcp_reno_cong_avoid, + .undo_cwnd = (void *)dctcp_cwnd_undo, + .set_state = (void *)dctcp_state, + .flags = TCP_CONG_NEEDS_ECN, + .name = "bpf_dctcp", +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 5ae485a6af3f..9941f0ba471e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -16,8 +16,8 @@ #include <sys/socket.h> #include <linux/if_tunnel.h> #include <linux/mpls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; #define PROG(F) SEC(#F) int bpf_func_##F @@ -65,8 +65,8 @@ struct { } jmp_table SEC(".maps"); struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1024); __type(key, __u32); __type(value, struct bpf_flow_keys); } last_dissection SEC(".maps"); @@ -74,15 +74,20 @@ struct { static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, int ret) { - struct bpf_flow_keys *val; - __u32 key = 0; + __u32 key = (__u32)(keys->sport) << 16 | keys->dport; + struct bpf_flow_keys val; - val = bpf_map_lookup_elem(&last_dissection, &key); - if (val) - memcpy(val, keys, sizeof(*val)); + memcpy(&val, keys, sizeof(val)); + bpf_map_update_elem(&last_dissection, &key, &val, BPF_ANY); return ret; } +#define IPV6_FLOWLABEL_MASK __bpf_constant_htonl(0x000FFFFF) +static inline __be32 ip6_flowlabel(const struct ipv6hdr *hdr) +{ + return *(__be32 *)hdr & IPV6_FLOWLABEL_MASK; +} + static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, __u16 hdr_size, void *buffer) @@ -153,7 +158,6 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) struct tcphdr *tcp, _tcp; struct udphdr *udp, _udp; - keys->ip_proto = proto; switch (proto) { case IPPROTO_ICMP: icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); @@ -162,9 +166,15 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) return export_flow_keys(keys, BPF_OK); case IPPROTO_IPIP: keys->is_encap = true; + if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return export_flow_keys(keys, BPF_OK); + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); case IPPROTO_IPV6: keys->is_encap = true; + if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return export_flow_keys(keys, BPF_OK); + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); case IPPROTO_GRE: gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); @@ -184,6 +194,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) keys->thoff += 4; /* Step over sequence number */ keys->is_encap = true; + if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP) + return export_flow_keys(keys, BPF_OK); if (gre->proto == bpf_htons(ETH_P_TEB)) { eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), @@ -231,7 +243,6 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) { struct bpf_flow_keys *keys = skb->flow_keys; - keys->ip_proto = nexthdr; switch (nexthdr) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: @@ -266,6 +277,7 @@ PROG(IP)(struct __sk_buff *skb) keys->addr_proto = ETH_P_IP; keys->ipv4_src = iph->saddr; keys->ipv4_dst = iph->daddr; + keys->ip_proto = iph->protocol; keys->thoff += iph->ihl << 2; if (data + keys->thoff > data_end) @@ -273,13 +285,20 @@ PROG(IP)(struct __sk_buff *skb) if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { keys->is_frag = true; - if (iph->frag_off & bpf_htons(IP_OFFSET)) + if (iph->frag_off & bpf_htons(IP_OFFSET)) { /* From second fragment on, packets do not have headers * we can parse. */ done = true; - else + } else { keys->is_first_frag = true; + /* No need to parse fragmented packet unless + * explicitly asked for. + */ + if (!(keys->flags & + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) + done = true; + } } if (done) @@ -301,6 +320,11 @@ PROG(IPV6)(struct __sk_buff *skb) memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); keys->thoff += sizeof(struct ipv6hdr); + keys->ip_proto = ip6h->nexthdr; + keys->flow_label = ip6_flowlabel(ip6h); + + if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) + return export_flow_keys(keys, BPF_OK); return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -317,7 +341,8 @@ PROG(IPV6OP)(struct __sk_buff *skb) /* hlen is in 8-octets and does not include the first 8 bytes * of the header */ - skb->flow_keys->thoff += (1 + ip6h->hdrlen) << 3; + keys->thoff += (1 + ip6h->hdrlen) << 3; + keys->ip_proto = ip6h->nexthdr; return parse_ipv6_proto(skb, ip6h->nexthdr); } @@ -333,9 +358,18 @@ PROG(IPV6FR)(struct __sk_buff *skb) keys->thoff += sizeof(*fragh); keys->is_frag = true; - if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) + keys->ip_proto = fragh->nexthdr; + + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) { keys->is_first_frag = true; + /* No need to parse fragmented packet unless + * explicitly asked for. + */ + if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) + return export_flow_keys(keys, BPF_OK); + } + return parse_ipv6_proto(skb, fragh->nexthdr); } diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c new file mode 100644 index 000000000000..018ed7fbba3a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c new file mode 100644 index 000000000000..13d662c57014 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_dim.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___diff_arr_dim x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c new file mode 100644 index 000000000000..a351f418c85d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___diff_arr_val_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___diff_arr_val_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c new file mode 100644 index 000000000000..65eac371b061 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c new file mode 100644 index 000000000000..ecda2b545ac2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c new file mode 100644 index 000000000000..a8735009becc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_non_array.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_non_array x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c new file mode 100644 index 000000000000..2a67c28b1e75 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_shallow.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_too_shallow x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c new file mode 100644 index 000000000000..1142c08c925f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_too_small.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_too_small x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c new file mode 100644 index 000000000000..f5a7c832d0f2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_wrong_val_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c new file mode 100644 index 000000000000..fe1d01232c22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___fixed_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c new file mode 100644 index 000000000000..cff6f1836cc5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_bitfields x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c new file mode 100644 index 000000000000..a1cd157d5451 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_bitfields___bit_sz_change x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c new file mode 100644 index 000000000000..3f2c7b07c456 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_bitfields___bitfield_vs_int x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c new file mode 100644 index 000000000000..f9746d6be399 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_bitfields___err_too_big_bitfield x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c new file mode 100644 index 000000000000..e7c75a6953dd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_bitfields___just_big_enough x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c new file mode 100644 index 000000000000..0b62315ad46c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c new file mode 100644 index 000000000000..dd0ffa518f36 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_arr_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c new file mode 100644 index 000000000000..bc83372088ad --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_arr_value_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c new file mode 100644 index 000000000000..917bec41be08 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c new file mode 100644 index 000000000000..6ec7e6ec1c91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c new file mode 100644 index 000000000000..7bbcacf2b0d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c new file mode 100644 index 000000000000..f384dd38ec70 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_struct_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c new file mode 100644 index 000000000000..aec2dec20e90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___minimal x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c new file mode 100644 index 000000000000..b74455b91227 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_flavors x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c new file mode 100644 index 000000000000..7b6035f86ee6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_flavors__err_wrong_name.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_flavors__err_wrong_name x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c new file mode 100644 index 000000000000..7d0f041042c5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_ints x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c new file mode 100644 index 000000000000..f9359450186e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___bool.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_ints___bool x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c new file mode 100644 index 000000000000..aafb1c5819d7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ints___reverse_sign.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_ints___reverse_sign x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c new file mode 100644 index 000000000000..ed9ad8b5b4f8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_misc.c @@ -0,0 +1,5 @@ +#include "core_reloc_types.h" + +void f1(struct core_reloc_misc___a x) {} +void f2(struct core_reloc_misc___b x) {} +void f3(struct core_reloc_misc_extensible x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c new file mode 100644 index 000000000000..124197a2e813 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_mods x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c new file mode 100644 index 000000000000..f8a6592ca75f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___mod_swap.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_mods___mod_swap x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c new file mode 100644 index 000000000000..5c0d73687247 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_mods___typedefs.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_mods___typedefs x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c new file mode 100644 index 000000000000..4480fcc0f183 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c new file mode 100644 index 000000000000..13e108f76ece --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___anon_embed.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___anon_embed x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c new file mode 100644 index 000000000000..76b54fda5fbb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___dup_compat_types.c @@ -0,0 +1,5 @@ +#include "core_reloc_types.h" + +void f1(struct core_reloc_nesting___dup_compat_types x) {} +void f2(struct core_reloc_nesting___dup_compat_types__2 x) {} +void f3(struct core_reloc_nesting___dup_compat_types__3 x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c new file mode 100644 index 000000000000..975fb95db810 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_container.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_array_container x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c new file mode 100644 index 000000000000..ad66c67e7980 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_array_field.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_array_field x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c new file mode 100644 index 000000000000..35c5f8da6812 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_dup_incompat_types.c @@ -0,0 +1,4 @@ +#include "core_reloc_types.h" + +void f1(struct core_reloc_nesting___err_dup_incompat_types__1 x) {} +void f2(struct core_reloc_nesting___err_dup_incompat_types__2 x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c new file mode 100644 index 000000000000..142e332041db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_container.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_missing_container x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c new file mode 100644 index 000000000000..efcae167fab9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_missing_field.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_missing_field x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c new file mode 100644 index 000000000000..97aaaedd8ada --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_nonstruct_container.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_nonstruct_container x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c new file mode 100644 index 000000000000..ffde35086e90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_partial_match_dups.c @@ -0,0 +1,4 @@ +#include "core_reloc_types.h" + +void f1(struct core_reloc_nesting___err_partial_match_dups__a x) {} +void f2(struct core_reloc_nesting___err_partial_match_dups__b x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c new file mode 100644 index 000000000000..39a2fadd8e95 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___err_too_deep.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___err_too_deep x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c new file mode 100644 index 000000000000..a09d9dfb20df --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___extra_nesting.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___extra_nesting x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c new file mode 100644 index 000000000000..3d8a1a74012f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_nesting___struct_union_mixup.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_nesting___struct_union_mixup x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c new file mode 100644 index 000000000000..96b90e39242a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c new file mode 100644 index 000000000000..6e87233a3ed0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_enum_def.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___diff_enum_def x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c new file mode 100644 index 000000000000..d9f48e80b9d9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_func_proto.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___diff_func_proto x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c new file mode 100644 index 000000000000..c718f75f8f3b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___diff_ptr_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___diff_ptr_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c new file mode 100644 index 000000000000..b8a120830891 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_enum.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___err_non_enum x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c new file mode 100644 index 000000000000..ad8b3c9aa76f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_int.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___err_non_int x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c new file mode 100644 index 000000000000..e20bc1d42d0a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_primitives___err_non_ptr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_primitives___err_non_ptr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c new file mode 100644 index 000000000000..8da52432ba17 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_ptr_as_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c new file mode 100644 index 000000000000..003acfc9a3e7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_ptr_as_arr___diff_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_ptr_as_arr___diff_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c new file mode 100644 index 000000000000..3c80903da5a4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c new file mode 100644 index 000000000000..6dbd14436b52 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___diff_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c index 3a62119c7498..35c512818a56 100644 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c @@ -62,6 +62,10 @@ struct padded_a_lot { * long: 64; * long: 64; * int b; + * long: 32; + * long: 64; + * long: 64; + * long: 64; *}; * */ @@ -95,7 +99,6 @@ struct zone_padding { struct zone { int a; short b; - short: 16; struct zone_padding __pad__; }; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1fd244d35ba9..75085119c5bb 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -9,8 +9,8 @@ #include <linux/in6.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP4 0x7f000004U #define DST_REWRITE_IP4 0x7f000001U diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c index 26397ab7b3c7..506d0f81a375 100644 --- a/tools/testing/selftests/bpf/progs/connect6_prog.c +++ b/tools/testing/selftests/bpf/progs/connect6_prog.c @@ -9,8 +9,8 @@ #include <linux/in6.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h new file mode 100644 index 000000000000..6d598cfbdb3e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -0,0 +1,806 @@ +#include <stdint.h> +#include <stdbool.h> +/* + * KERNEL + */ + +struct core_reloc_kernel_output { + int valid[10]; + char comm[sizeof("test_progs")]; + int comm_len; +}; + +/* + * FLAVORS + */ +struct core_reloc_flavors { + int a; + int b; + int c; +}; + +/* this is not a flavor, as it doesn't have triple underscore */ +struct core_reloc_flavors__err_wrong_name { + int a; + int b; + int c; +}; + +/* + * NESTING + */ +/* original set up, used to record relocations in BPF program */ +struct core_reloc_nesting_substruct { + int a; +}; + +union core_reloc_nesting_subunion { + int b; +}; + +struct core_reloc_nesting { + union { + struct core_reloc_nesting_substruct a; + } a; + struct { + union core_reloc_nesting_subunion b; + } b; +}; + +/* inlined anonymous struct/union instead of named structs in original */ +struct core_reloc_nesting___anon_embed { + int __just_for_padding; + union { + struct { + int a; + } a; + } a; + struct { + union { + int b; + } b; + } b; +}; + +/* different mix of nested structs/unions than in original */ +struct core_reloc_nesting___struct_union_mixup { + int __a; + struct { + int __a; + union { + char __a; + int a; + } a; + } a; + int __b; + union { + int __b; + union { + char __b; + int b; + } b; + } b; +}; + +/* extra anon structs/unions, but still valid a.a.a and b.b.b accessors */ +struct core_reloc_nesting___extra_nesting { + int __padding; + struct { + struct { + struct { + struct { + union { + int a; + } a; + }; + }; + } a; + int __some_more; + struct { + union { + union { + union { + struct { + int b; + }; + } b; + }; + } b; + }; + }; +}; + +/* three flavors of same struct with different structure but same layout for + * a.a.a and b.b.b, thus successfully resolved and relocatable */ +struct core_reloc_nesting___dup_compat_types { + char __just_for_padding; + /* 3 more bytes of padding */ + struct { + struct { + int a; /* offset 4 */ + } a; + } a; + long long __more_padding; + struct { + struct { + int b; /* offset 16 */ + } b; + } b; +}; + +struct core_reloc_nesting___dup_compat_types__2 { + int __aligned_padding; + struct { + int __trickier_noop[0]; + struct { + char __some_more_noops[0]; + int a; /* offset 4 */ + } a; + } a; + int __more_padding; + struct { + struct { + struct { + int __critical_padding; + int b; /* offset 16 */ + } b; + int __does_not_matter; + }; + } b; + int __more_irrelevant_stuff; +}; + +struct core_reloc_nesting___dup_compat_types__3 { + char __correct_padding[4]; + struct { + struct { + int a; /* offset 4 */ + } a; + } a; + /* 8 byte padding due to next struct's alignment */ + struct { + struct { + int b; + } b; + } b __attribute__((aligned(16))); +}; + +/* b.b.b field is missing */ +struct core_reloc_nesting___err_missing_field { + struct { + struct { + int a; + } a; + } a; + struct { + struct { + int x; + } b; + } b; +}; + +/* b.b.b field is an array of integers instead of plain int */ +struct core_reloc_nesting___err_array_field { + struct { + struct { + int a; + } a; + } a; + struct { + struct { + int b[1]; + } b; + } b; +}; + +/* middle b container is missing */ +struct core_reloc_nesting___err_missing_container { + struct { + struct { + int a; + } a; + } a; + struct { + int x; + } b; +}; + +/* middle b container is referenced through pointer instead of being embedded */ +struct core_reloc_nesting___err_nonstruct_container { + struct { + struct { + int a; + } a; + } a; + struct { + struct { + int b; + } *b; + } b; +}; + +/* middle b container is an array of structs instead of plain struct */ +struct core_reloc_nesting___err_array_container { + struct { + struct { + int a; + } a; + } a; + struct { + struct { + int b; + } b[1]; + } b; +}; + +/* two flavors of same struct with incompatible layout for b.b.b */ +struct core_reloc_nesting___err_dup_incompat_types__1 { + struct { + struct { + int a; /* offset 0 */ + } a; + } a; + struct { + struct { + int b; /* offset 4 */ + } b; + } b; +}; + +struct core_reloc_nesting___err_dup_incompat_types__2 { + struct { + struct { + int a; /* offset 0 */ + } a; + } a; + int __extra_padding; + struct { + struct { + int b; /* offset 8 (!) */ + } b; + } b; +}; + +/* two flavors of same struct having one of a.a.a and b.b.b, but not both */ +struct core_reloc_nesting___err_partial_match_dups__a { + struct { + struct { + int a; + } a; + } a; +}; + +struct core_reloc_nesting___err_partial_match_dups__b { + struct { + struct { + int b; + } b; + } b; +}; + +struct core_reloc_nesting___err_too_deep { + struct { + struct { + int a; + } a; + } a; + /* 65 levels of nestedness for b.b.b */ + struct { + struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + struct { struct { struct { struct { struct { + /* this one is one too much */ + struct { + int b; + }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + }; }; }; }; }; + } b; + } b; +}; + +/* + * ARRAYS + */ +struct core_reloc_arrays_output { + int a2; + char b123; + int c1c; + int d00d; + int f10c; +}; + +struct core_reloc_arrays_substruct { + int c; + int d; +}; + +struct core_reloc_arrays { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +/* bigger array dimensions */ +struct core_reloc_arrays___diff_arr_dim { + int a[7]; + char b[3][4][5]; + struct core_reloc_arrays_substruct c[4]; + struct core_reloc_arrays_substruct d[2][3]; + struct core_reloc_arrays_substruct f[1][3]; +}; + +/* different size of array's value (struct) */ +struct core_reloc_arrays___diff_arr_val_sz { + int a[5]; + char b[2][3][4]; + struct { + int __padding1; + int c; + int __padding2; + } c[3]; + struct { + int __padding1; + int d; + int __padding2; + } d[1][2]; + struct { + int __padding1; + int c; + int __padding2; + } f[][2]; +}; + +struct core_reloc_arrays___equiv_zero_sz_arr { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + /* equivalent to flexible array */ + struct core_reloc_arrays_substruct f[0][2]; +}; + +struct core_reloc_arrays___fixed_arr { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + /* not a flexible array anymore, but within access bounds */ + struct core_reloc_arrays_substruct f[1][2]; +}; + +struct core_reloc_arrays___err_too_small { + int a[2]; /* this one is too small */ + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +struct core_reloc_arrays___err_too_shallow { + int a[5]; + char b[2][3]; /* this one lacks one dimension */ + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +struct core_reloc_arrays___err_non_array { + int a; /* not an array */ + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +struct core_reloc_arrays___err_wrong_val_type { + int a[5]; + char b[2][3][4]; + int c[3]; /* value is not a struct */ + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +struct core_reloc_arrays___err_bad_zero_sz_arr { + /* zero-sized array, but not at the end */ + struct core_reloc_arrays_substruct f[0][2]; + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; +}; + +/* + * PRIMITIVES + */ +enum core_reloc_primitives_enum { + A = 0, + B = 1, +}; + +struct core_reloc_primitives { + char a; + int b; + enum core_reloc_primitives_enum c; + void *d; + int (*f)(const char *); +}; + +struct core_reloc_primitives___diff_enum_def { + char a; + int b; + void *d; + int (*f)(const char *); + enum { + X = 100, + Y = 200, + } c; /* inline enum def with differing set of values */ +}; + +struct core_reloc_primitives___diff_func_proto { + void (*f)(int); /* incompatible function prototype */ + void *d; + enum core_reloc_primitives_enum c; + int b; + char a; +}; + +struct core_reloc_primitives___diff_ptr_type { + const char * const d; /* different pointee type + modifiers */ + char a; + int b; + enum core_reloc_primitives_enum c; + int (*f)(const char *); +}; + +struct core_reloc_primitives___err_non_enum { + char a[1]; + int b; + int c; /* int instead of enum */ + void *d; + int (*f)(const char *); +}; + +struct core_reloc_primitives___err_non_int { + char a[1]; + int *b; /* ptr instead of int */ + enum core_reloc_primitives_enum c; + void *d; + int (*f)(const char *); +}; + +struct core_reloc_primitives___err_non_ptr { + char a[1]; + int b; + enum core_reloc_primitives_enum c; + int d; /* int instead of ptr */ + int (*f)(const char *); +}; + +/* + * MODS + */ +struct core_reloc_mods_output { + int a, b, c, d, e, f, g, h; +}; + +typedef const int int_t; +typedef const char *char_ptr_t; +typedef const int arr_t[7]; + +struct core_reloc_mods_substruct { + int x; + int y; +}; + +typedef struct { + int x; + int y; +} core_reloc_mods_substruct_t; + +struct core_reloc_mods { + int a; + int_t b; + char *c; + char_ptr_t d; + int e[3]; + arr_t f; + struct core_reloc_mods_substruct g; + core_reloc_mods_substruct_t h; +}; + +/* a/b, c/d, e/f, and g/h pairs are swapped */ +struct core_reloc_mods___mod_swap { + int b; + int_t a; + char *d; + char_ptr_t c; + int f[3]; + arr_t e; + struct { + int y; + int x; + } h; + core_reloc_mods_substruct_t g; +}; + +typedef int int1_t; +typedef int1_t int2_t; +typedef int2_t int3_t; + +typedef int arr1_t[5]; +typedef arr1_t arr2_t; +typedef arr2_t arr3_t; +typedef arr3_t arr4_t; + +typedef const char * const volatile fancy_char_ptr_t; + +typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt; + +/* we need more typedefs */ +struct core_reloc_mods___typedefs { + core_reloc_mods_substruct_tt g; + core_reloc_mods_substruct_tt h; + arr4_t f; + arr4_t e; + fancy_char_ptr_t d; + fancy_char_ptr_t c; + int3_t b; + int3_t a; +}; + +/* + * PTR_AS_ARR + */ +struct core_reloc_ptr_as_arr { + int a; +}; + +struct core_reloc_ptr_as_arr___diff_sz { + int :32; /* padding */ + char __some_more_padding; + int a; +}; + +/* + * INTS + */ +struct core_reloc_ints { + uint8_t u8_field; + int8_t s8_field; + uint16_t u16_field; + int16_t s16_field; + uint32_t u32_field; + int32_t s32_field; + uint64_t u64_field; + int64_t s64_field; +}; + +/* signed/unsigned types swap */ +struct core_reloc_ints___reverse_sign { + int8_t u8_field; + uint8_t s8_field; + int16_t u16_field; + uint16_t s16_field; + int32_t u32_field; + uint32_t s32_field; + int64_t u64_field; + uint64_t s64_field; +}; + +struct core_reloc_ints___bool { + bool u8_field; /* bool instead of uint8 */ + int8_t s8_field; + uint16_t u16_field; + int16_t s16_field; + uint32_t u32_field; + int32_t s32_field; + uint64_t u64_field; + int64_t s64_field; +}; + +/* + * MISC + */ +struct core_reloc_misc_output { + int a, b, c; +}; + +struct core_reloc_misc___a { + int a1; + int a2; +}; + +struct core_reloc_misc___b { + int b1; + int b2; +}; + +/* this one extends core_reloc_misc_extensible struct from BPF prog */ +struct core_reloc_misc_extensible { + int a; + int b; + int c; + int d; +}; + +/* + * EXISTENCE + */ +struct core_reloc_existence_output { + int a_exists; + int a_value; + int b_exists; + int b_value; + int c_exists; + int c_value; + int arr_exists; + int arr_value; + int s_exists; + int s_value; +}; + +struct core_reloc_existence { + int a; + struct { + int b; + }; + int c; + int arr[1]; + struct { + int x; + } s; +}; + +struct core_reloc_existence___minimal { + int a; +}; + +struct core_reloc_existence___err_wrong_int_sz { + short a; +}; + +struct core_reloc_existence___err_wrong_int_type { + int b[1]; +}; + +struct core_reloc_existence___err_wrong_int_kind { + struct{ int x; } c; +}; + +struct core_reloc_existence___err_wrong_arr_kind { + int arr; +}; + +struct core_reloc_existence___err_wrong_arr_value_type { + short arr[1]; +}; + +struct core_reloc_existence___err_wrong_struct_type { + int s; +}; + +/* + * BITFIELDS + */ +/* bitfield read results, all as plain integers */ +struct core_reloc_bitfields_output { + int64_t ub1; + int64_t ub2; + int64_t ub7; + int64_t sb4; + int64_t sb20; + int64_t u32; + int64_t s32; +}; + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +}; + +/* different bit sizes (both up and down) */ +struct core_reloc_bitfields___bit_sz_change { + /* unsigned bitfields */ + uint16_t ub1: 3; /* 1 -> 3 */ + uint32_t ub2: 20; /* 2 -> 20 */ + uint8_t ub7: 1; /* 7 -> 1 */ + /* signed bitfields */ + int8_t sb4: 1; /* 4 -> 1 */ + int32_t sb20: 30; /* 20 -> 30 */ + /* non-bitfields */ + uint16_t u32; /* 32 -> 16 */ + int64_t s32; /* 32 -> 64 */ +}; + +/* turn bitfield into non-bitfield and vice versa */ +struct core_reloc_bitfields___bitfield_vs_int { + uint64_t ub1; /* 3 -> 64 non-bitfield */ + uint8_t ub2; /* 20 -> 8 non-bitfield */ + int64_t ub7; /* 7 -> 64 non-bitfield signed */ + int64_t sb4; /* 4 -> 64 non-bitfield signed */ + uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */ + int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */ + uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */ +}; + +struct core_reloc_bitfields___just_big_enough { + uint64_t ub1: 4; + uint64_t ub2: 60; /* packed tightly */ + uint32_t ub7; + uint32_t sb4; + uint32_t sb20; + uint32_t u32; + uint32_t s32; +} __attribute__((packed)) ; + +struct core_reloc_bitfields___err_too_big_bitfield { + uint64_t ub1: 4; + uint64_t ub2: 61; /* packed tightly */ + uint32_t ub7; + uint32_t sb4; + uint32_t sb20; + uint32_t u32; + uint32_t s32; +} __attribute__((packed)) ; + +/* + * SIZE + */ +struct core_reloc_size_output { + int int_sz; + int struct_sz; + int union_sz; + int arr_sz; + int arr_elem_sz; + int ptr_sz; + int enum_sz; +}; + +struct core_reloc_size { + int int_field; + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE = 123 } enum_field; +}; + +struct core_reloc_size___diff_sz { + uint64_t int_field; + struct { int x; int y; int z; } struct_field; + union { int x; char bla[123]; } union_field; + char arr_field[10]; + void *ptr_field; + enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; +}; diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c index ce41a3475f27..8924e06bdef0 100644 --- a/tools/testing/selftests/bpf/progs/dev_cgroup.c +++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c @@ -7,7 +7,7 @@ #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("cgroup/dev") int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx) diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c new file mode 100644 index 000000000000..38d3a82144ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_test.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" + +char _license[] SEC("license") = "GPL"; + +__u64 test1_result = 0; +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(test1, int a) +{ + test1_result = a == 1; + return 0; +} + +__u64 test2_result = 0; +SEC("fentry/bpf_fentry_test2") +int BPF_PROG(test2, int a, __u64 b) +{ + test2_result = a == 2 && b == 3; + return 0; +} + +__u64 test3_result = 0; +SEC("fentry/bpf_fentry_test3") +int BPF_PROG(test3, char a, int b, __u64 c) +{ + test3_result = a == 4 && b == 5 && c == 6; + return 0; +} + +__u64 test4_result = 0; +SEC("fentry/bpf_fentry_test4") +int BPF_PROG(test4, void *a, char b, int c, __u64 d) +{ + test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10; + return 0; +} + +__u64 test5_result = 0; +SEC("fentry/bpf_fentry_test5") +int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e) +{ + test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && + e == 15; + return 0; +} + +__u64 test6_result = 0; +SEC("fentry/bpf_fentry_test6") +int BPF_PROG(test6, __u64 a, void *b, short c, int d, void * e, __u64 f) +{ + test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c new file mode 100644 index 000000000000..c329fccf9842 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/stddef.h> +#include <linux/ipv6.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_trace_helpers.h" + +struct sk_buff { + unsigned int len; +}; + +__u64 test_result = 0; +SEC("fexit/test_pkt_access") +int BPF_PROG(test_main, struct sk_buff *skb, int ret) +{ + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ret != 0) + return 0; + test_result = 1; + return 0; +} + +__u64 test_result_subprog1 = 0; +SEC("fexit/test_pkt_access_subprog1") +int BPF_PROG(test_subprog1, struct sk_buff *skb, int ret) +{ + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ret != 148) + return 0; + test_result_subprog1 = 1; + return 0; +} + +/* Though test_pkt_access_subprog2() is defined in C as: + * static __attribute__ ((noinline)) + * int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) + * { + * return skb->len * val; + * } + * llvm optimizations remove 'int val' argument and generate BPF assembly: + * r0 = *(u32 *)(r1 + 0) + * w0 <<= 1 + * exit + * In such case the verifier falls back to conservative and + * tracing program can access arguments and return value as u64 + * instead of accurate types. + */ +struct args_subprog2 { + __u64 args[5]; + __u64 ret; +}; +__u64 test_result_subprog2 = 0; +SEC("fexit/test_pkt_access_subprog2") +int test_subprog2(struct args_subprog2 *ctx) +{ + struct sk_buff *skb = (void *)ctx->args[0]; + __u64 ret; + int len; + + bpf_probe_read_kernel(&len, sizeof(len), + __builtin_preserve_access_index(&skb->len)); + + ret = ctx->ret; + /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 + * which randomizes upper 32 bits after BPF_ALU32 insns. + * Hence after 'w0 <<= 1' upper bits of $rax are random. + * That is expected and correct. Trim them. + */ + ret = (__u32) ret; + if (len != 74 || ret != 148) + return 0; + test_result_subprog2 = 1; + return 0; +} + +__u64 test_result_subprog3 = 0; +SEC("fexit/test_pkt_access_subprog3") +int BPF_PROG(test_subprog3, int val, struct sk_buff *skb, int ret) +{ + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ret != 74 * val || val != 3) + return 0; + test_result_subprog3 = 1; + return 0; +} + +__u64 test_get_skb_len = 0; +SEC("freplace/get_skb_len") +int new_get_skb_len(struct __sk_buff *skb) +{ + int len = skb->len; + + if (len != 74) + return 0; + test_get_skb_len = 1; + return 74; /* original get_skb_len() returns skb->len */ +} + +__u64 test_get_skb_ifindex = 0; +SEC("freplace/get_skb_ifindex") +int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ipv6hdr ip6, *ip6p; + int ifindex = skb->ifindex; + __u32 eth_proto; + __u32 nh_off; + + /* check that BPF extension can read packet via direct packet access */ + if (data + 14 + sizeof(ip6) > data_end) + return 0; + ip6p = data + 14; + + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) + return 0; + + /* check that legacy packet access helper works too */ + if (bpf_skb_load_bytes(skb, 14, &ip6, sizeof(ip6)) < 0) + return 0; + ip6p = &ip6; + if (ip6p->nexthdr != 6 || ip6p->payload_len != __bpf_constant_htons(123)) + return 0; + + if (ifindex != 1 || val != 3 || var != 1) + return 0; + test_get_skb_ifindex = 1; + return 3; /* original get_skb_ifindex() returns val * ifindex * var */ +} + +volatile __u64 test_get_constant = 0; +SEC("freplace/get_constant") +int new_get_constant(long val) +{ + if (val != 123) + return 0; + test_get_constant = 1; + return test_get_constant; /* original get_constant() returns val - 122 */ +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c new file mode 100644 index 000000000000..92f3fa47cf40 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" + +struct sk_buff { + unsigned int len; +}; + +__u64 test_result = 0; + +SEC("fexit/test_pkt_md_access") +int BPF_PROG(test_main2, struct sk_buff *skb, int ret) +{ + int len; + + __builtin_preserve_access_index(({ + len = skb->len; + })); + if (len != 74 || ret != 0) + return 0; + + test_result = 1; + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c new file mode 100644 index 000000000000..348109b9ea07 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_test.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" + +char _license[] SEC("license") = "GPL"; + +__u64 test1_result = 0; +SEC("fexit/bpf_fentry_test1") +int BPF_PROG(test1, int a, int ret) +{ + test1_result = a == 1 && ret == 2; + return 0; +} + +__u64 test2_result = 0; +SEC("fexit/bpf_fentry_test2") +int BPF_PROG(test2, int a, __u64 b, int ret) +{ + test2_result = a == 2 && b == 3 && ret == 5; + return 0; +} + +__u64 test3_result = 0; +SEC("fexit/bpf_fentry_test3") +int BPF_PROG(test3, char a, int b, __u64 c, int ret) +{ + test3_result = a == 4 && b == 5 && c == 6 && ret == 15; + return 0; +} + +__u64 test4_result = 0; +SEC("fexit/bpf_fentry_test4") +int BPF_PROG(test4, void *a, char b, int c, __u64 d, int ret) +{ + test4_result = a == (void *)7 && b == 8 && c == 9 && d == 10 && + ret == 34; + return 0; +} + +__u64 test5_result = 0; +SEC("fexit/bpf_fentry_test5") +int BPF_PROG(test5, __u64 a, void *b, short c, int d, __u64 e, int ret) +{ + test5_result = a == 11 && b == (void *)12 && c == 13 && d == 14 && + e == 15 && ret == 65; + return 0; +} + +__u64 test6_result = 0; +SEC("fexit/bpf_fentry_test6") +int BPF_PROG(test6, __u64 a, void *b, short c, int d, void *e, __u64 f, int ret) +{ + test6_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21 && ret == 111; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c index 16c54ade6888..6b42db2fe391 100644 --- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c new file mode 100644 index 000000000000..8f48a909f079 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_trace_helpers.h" + +char _license[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + +#define _(P) (__builtin_preserve_access_index(P)) + +/* define few struct-s that bpf program needs to access */ +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); +}; +struct dev_ifalias { + struct callback_head rcuhead; +}; + +struct net_device /* same as kernel's struct net_device */ { + int ifindex; + struct dev_ifalias *ifalias; +}; + +typedef struct { + int counter; +} atomic_t; +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +struct sk_buff { + /* field names and sizes should match to those in the kernel */ + unsigned int len, data_len; + __u16 mac_len, hdr_len, queue_mapping; + struct net_device *dev; + /* order of the fields doesn't matter */ + refcount_t users; + unsigned char *data; + char __pkt_type_offset[0]; + char cb[48]; +}; + +struct meta { + int ifindex; + __u32 cb32_0; + __u8 cb8_0; +}; + +/* TRACE_EVENT(kfree_skb, + * TP_PROTO(struct sk_buff *skb, void *location), + */ +SEC("tp_btf/kfree_skb") +int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location) +{ + struct net_device *dev; + struct callback_head *ptr; + void *func; + int users; + unsigned char *data; + unsigned short pkt_data; + struct meta meta = {}; + char pkt_type; + __u32 *cb32; + __u8 *cb8; + + __builtin_preserve_access_index(({ + users = skb->users.refs.counter; + data = skb->data; + dev = skb->dev; + ptr = dev->ifalias->rcuhead.next; + func = ptr->func; + cb8 = (__u8 *)&skb->cb; + cb32 = (__u32 *)&skb->cb; + })); + + meta.ifindex = _(dev->ifindex); + meta.cb8_0 = cb8[8]; + meta.cb32_0 = cb32[2]; + + bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset)); + pkt_type &= 7; + + /* read eth proto */ + bpf_probe_read_kernel(&pkt_data, sizeof(pkt_data), data + 12); + + bpf_printk("rcuhead.next %llx func %llx\n", ptr, func); + bpf_printk("skb->len %d users %d pkt_type %x\n", + _(skb->len), users, pkt_type); + bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping)); + bpf_printk("dev->ifindex %d data %llx pkt_data %x\n", + meta.ifindex, data, pkt_data); + bpf_printk("cb8_0:%x cb32_0:%x\n", meta.cb8_0, meta.cb32_0); + + if (users != 1 || pkt_data != bpf_htons(0x86dd) || meta.ifindex != 1) + /* raw tp ignores return value */ + return 0; + + /* send first 72 byte of the packet to user space */ + bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + return 0; +} + +static volatile struct { + bool fentry_test_ok; + bool fexit_test_ok; +} result; + +SEC("fentry/eth_type_trans") +int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev, + unsigned short protocol) +{ + int len, ifindex; + + __builtin_preserve_access_index(({ + len = skb->len; + ifindex = dev->ifindex; + })); + + /* fentry sees full packet including L2 header */ + if (len != 74 || ifindex != 1) + return 0; + result.fentry_test_ok = true; + return 0; +} + +SEC("fexit/eth_type_trans") +int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb, struct net_device *dev, + unsigned short protocol) +{ + int len, ifindex; + + __builtin_preserve_access_index(({ + len = skb->len; + ifindex = dev->ifindex; + })); + + /* fexit sees packet without L2 header that eth_type_trans should have + * consumed. + */ + if (len != 60 || protocol != bpf_htons(0x86dd) || ifindex != 1) + return 0; + result.fexit_test_ok = true; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c index 7cdb7f878310..50e66772c046 100644 --- a/tools/testing/selftests/bpf/progs/loop1.c +++ b/tools/testing/selftests/bpf/progs/loop1.c @@ -6,7 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c index 9b2f808a2863..947bb7e988c2 100644 --- a/tools/testing/selftests/bpf/progs/loop2.c +++ b/tools/testing/selftests/bpf/progs/loop2.c @@ -6,7 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c index d727657d51e2..76e93b31c14b 100644 --- a/tools/testing/selftests/bpf/progs/loop3.c +++ b/tools/testing/selftests/bpf/progs/loop3.c @@ -6,7 +6,8 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c new file mode 100644 index 000000000000..b35337926d66 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop4.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("socket") +int combinations(volatile struct __sk_buff* skb) +{ + int ret = 0, i; + +#pragma nounroll + for (i = 0; i < 20; i++) + if (skb->len) + ret |= 1 << i; + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c new file mode 100644 index 000000000000..913791923fa3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/loop5.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#define barrier() __asm__ __volatile__("": : :"memory") + +char _license[] SEC("license") = "GPL"; + +SEC("socket") +int while_true(volatile struct __sk_buff* skb) +{ + int i = 0; + + while (1) { + if (skb->len) + i += 3; + else + i += 7; + if (i == 9) + break; + barrier(); + if (i == 10) + break; + barrier(); + if (i == 13) + break; + barrier(); + if (i == 14) + break; + } + return i; +} diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index 38a997852cad..d071adf178bd 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -2,7 +2,7 @@ #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "netcnt_common.h" #define MAX_BPS (3 * 1024 * 1024) diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index 003fe106fc70..cc615b82b56e 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -6,7 +6,7 @@ #include <stddef.h> #include <stdbool.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define FUNCTION_NAME_LEN 64 #define FILE_NAME_LEN 128 @@ -72,9 +72,9 @@ static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) void* thread_state; int key; - bpf_probe_read(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); - bpf_probe_read(&thread_state, sizeof(thread_state), - tls_base + 0x310 + key * 0x10 + 0x08); + bpf_probe_read_user(&key, sizeof(key), (void*)(long)pidData->tls_key_addr); + bpf_probe_read_user(&thread_state, sizeof(thread_state), + tls_base + 0x310 + key * 0x10 + 0x08); return thread_state; } @@ -82,31 +82,33 @@ static __always_inline bool get_frame_data(void *frame_ptr, PidData *pidData, FrameData *frame, Symbol *symbol) { // read data from PyFrameObject - bpf_probe_read(&frame->f_back, - sizeof(frame->f_back), - frame_ptr + pidData->offsets.PyFrameObject_back); - bpf_probe_read(&frame->f_code, - sizeof(frame->f_code), - frame_ptr + pidData->offsets.PyFrameObject_code); + bpf_probe_read_user(&frame->f_back, + sizeof(frame->f_back), + frame_ptr + pidData->offsets.PyFrameObject_back); + bpf_probe_read_user(&frame->f_code, + sizeof(frame->f_code), + frame_ptr + pidData->offsets.PyFrameObject_code); // read data from PyCodeObject if (!frame->f_code) return false; - bpf_probe_read(&frame->co_filename, - sizeof(frame->co_filename), - frame->f_code + pidData->offsets.PyCodeObject_filename); - bpf_probe_read(&frame->co_name, - sizeof(frame->co_name), - frame->f_code + pidData->offsets.PyCodeObject_name); + bpf_probe_read_user(&frame->co_filename, + sizeof(frame->co_filename), + frame->f_code + pidData->offsets.PyCodeObject_filename); + bpf_probe_read_user(&frame->co_name, + sizeof(frame->co_name), + frame->f_code + pidData->offsets.PyCodeObject_name); // read actual names into symbol if (frame->co_filename) - bpf_probe_read_str(&symbol->file, - sizeof(symbol->file), - frame->co_filename + pidData->offsets.String_data); + bpf_probe_read_user_str(&symbol->file, + sizeof(symbol->file), + frame->co_filename + + pidData->offsets.String_data); if (frame->co_name) - bpf_probe_read_str(&symbol->name, - sizeof(symbol->name), - frame->co_name + pidData->offsets.String_data); + bpf_probe_read_user_str(&symbol->name, + sizeof(symbol->name), + frame->co_name + + pidData->offsets.String_data); return true; } @@ -152,7 +154,12 @@ struct { __uint(value_size, sizeof(long long) * 127); } stackmap SEC(".maps"); -static __always_inline int __on_event(struct pt_regs *ctx) +#ifdef GLOBAL_FUNC +__attribute__((noinline)) +#else +static __always_inline +#endif +int __on_event(struct bpf_raw_tracepoint_args *ctx) { uint64_t pid_tgid = bpf_get_current_pid_tgid(); pid_t pid = (pid_t)(pid_tgid >> 32); @@ -174,9 +181,9 @@ static __always_inline int __on_event(struct pt_regs *ctx) event->kernel_stack_id = bpf_get_stackid(ctx, &stackmap, 0); void* thread_state_current = (void*)0; - bpf_probe_read(&thread_state_current, - sizeof(thread_state_current), - (void*)(long)pidData->current_state_addr); + bpf_probe_read_user(&thread_state_current, + sizeof(thread_state_current), + (void*)(long)pidData->current_state_addr); struct task_struct* task = (struct task_struct*)bpf_get_current_task(); void* tls_base = (void*)task; @@ -188,11 +195,13 @@ static __always_inline int __on_event(struct pt_regs *ctx) if (pidData->use_tls) { uint64_t pthread_created; uint64_t pthread_self; - bpf_probe_read(&pthread_self, sizeof(pthread_self), tls_base + 0x10); + bpf_probe_read_user(&pthread_self, sizeof(pthread_self), + tls_base + 0x10); - bpf_probe_read(&pthread_created, - sizeof(pthread_created), - thread_state + pidData->offsets.PyThreadState_thread); + bpf_probe_read_user(&pthread_created, + sizeof(pthread_created), + thread_state + + pidData->offsets.PyThreadState_thread); event->pthread_match = pthread_created == pthread_self; } else { event->pthread_match = 1; @@ -204,9 +213,10 @@ static __always_inline int __on_event(struct pt_regs *ctx) Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id(); - bpf_probe_read(&frame_ptr, - sizeof(frame_ptr), - thread_state + pidData->offsets.PyThreadState_frame); + bpf_probe_read_user(&frame_ptr, + sizeof(frame_ptr), + thread_state + + pidData->offsets.PyThreadState_frame); int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym); if (symbol_counter == NULL) @@ -249,7 +259,7 @@ static __always_inline int __on_event(struct pt_regs *ctx) } SEC("raw_tracepoint/kfree_skb") -int on_event(struct pt_regs* ctx) +int on_event(struct bpf_raw_tracepoint_args* ctx) { int i, ret = 0; ret |= __on_event(ctx); diff --git a/tools/testing/selftests/bpf/progs/pyperf_global.c b/tools/testing/selftests/bpf/progs/pyperf_global.c new file mode 100644 index 000000000000..079e78a7562b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf_global.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define STACK_MAX_LEN 50 +#define GLOBAL_FUNC +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c index 0756303676ac..1612a32007b6 100644 --- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c +++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct bpf_map_def SEC("maps") htab = { .type = BPF_MAP_TYPE_HASH, diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c index a91536b1c47e..092d9da536f3 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c @@ -5,8 +5,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC1_IP4 0xAC100001U /* 172.16.0.1 */ #define SRC2_IP4 0x00000000U diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c index a68062820410..255a432bc163 100644 --- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c +++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c @@ -5,8 +5,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define SRC_REWRITE_IP6_0 0 #define SRC_REWRITE_IP6_1 0 diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c index e4440fdd94cb..0cb5656a22b0 100644 --- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c @@ -4,8 +4,8 @@ #include <linux/bpf.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct socket_cookie { __u64 cookie_key; diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c index 9390e0244259..a5c6d5903b22 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c @@ -1,6 +1,6 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c index e80484d98a1a..fdb4bf4408fa 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c @@ -1,7 +1,7 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c index 433e23918a62..4797dc985064 100644 --- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c +++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c @@ -1,6 +1,6 @@ #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c new file mode 100644 index 000000000000..c6d428a8d785 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; +__u32 _version SEC("version") = 1; + +#define SOL_CUSTOM 0xdeadbeef +#define CUSTOM_INHERIT1 0 +#define CUSTOM_INHERIT2 1 +#define CUSTOM_LISTENER 2 + +struct sockopt_inherit { + __u8 val; +}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct sockopt_inherit); +} cloned1_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct sockopt_inherit); +} cloned2_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sockopt_inherit); +} listener_only_map SEC(".maps"); + +static __inline struct sockopt_inherit *get_storage(struct bpf_sockopt *ctx) +{ + if (ctx->optname == CUSTOM_INHERIT1) + return bpf_sk_storage_get(&cloned1_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + else if (ctx->optname == CUSTOM_INHERIT2) + return bpf_sk_storage_get(&cloned2_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + else + return bpf_sk_storage_get(&listener_only_map, ctx->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); +} + +SEC("cgroup/getsockopt") +int _getsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + struct sockopt_inherit *storage; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_CUSTOM) + return 1; /* only interested in SOL_CUSTOM */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = get_storage(ctx); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + ctx->retval = 0; /* Reset system call return value to zero */ + + optval[0] = storage->val; + ctx->optlen = 1; + + return 1; +} + +SEC("cgroup/setsockopt") +int _setsockopt(struct bpf_sockopt *ctx) +{ + __u8 *optval_end = ctx->optval_end; + struct sockopt_inherit *storage; + __u8 *optval = ctx->optval; + + if (ctx->level != SOL_CUSTOM) + return 1; /* only interested in SOL_CUSTOM */ + + if (optval + 1 > optval_end) + return 0; /* EPERM, bounds check */ + + storage = get_storage(ctx); + if (!storage) + return 0; /* EPERM, couldn't get sk storage */ + + storage->val = optval[0]; + ctx->optlen = -1; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 4afd2595c08e..9d8c212dde9f 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <netinet/in.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index 076122c898e9..d5a5eeb5fb52 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include <string.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; @@ -12,13 +14,12 @@ struct sockopt_sk { __u8 val; }; -struct bpf_map_def SEC("maps") socket_storage_map = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .key_size = sizeof(int), - .value_size = sizeof(struct sockopt_sk), - .map_flags = BPF_F_NO_PREALLOC, -}; -BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct sockopt_sk); +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct sockopt_sk); +} socket_storage_map SEC(".maps"); SEC("cgroup/getsockopt") int _getsockopt(struct bpf_sockopt *ctx) @@ -42,6 +43,14 @@ int _getsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { + /* Not interested in SOL_TCP:TCP_CONGESTION; + * let next BPF program in the cgroup chain or kernel + * handle it. + */ + return 1; + } + if (ctx->level != SOL_CUSTOM) return 0; /* EPERM, deny everything except custom level */ @@ -91,6 +100,18 @@ int _setsockopt(struct bpf_sockopt *ctx) return 1; } + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { + /* Always use cubic */ + + if (optval + 5 > optval_end) + return 0; /* EPERM, bounds check */ + + memcpy(optval, "cubic", 5); + ctx->optlen = 5; + + return 1; + } + if (ctx->level != SOL_CUSTOM) return 0; /* EPERM, deny everything except custom level */ diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 8a399bdfd920..ad61b722a9de 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -8,7 +8,7 @@ #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/types.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> typedef uint32_t pid_t; struct task_struct {}; @@ -98,7 +98,7 @@ struct strobe_map_raw { /* * having volatile doesn't change anything on BPF side, but clang * emits warnings for passing `volatile const char *` into - * bpf_probe_read_str that expects just `const char *` + * bpf_probe_read_user_str that expects just `const char *` */ const char* tag; /* @@ -309,18 +309,18 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc, dtv_t *dtv; void *tls_ptr; - bpf_probe_read(&tls_index, sizeof(struct tls_index), - (void *)loc->offset); + bpf_probe_read_user(&tls_index, sizeof(struct tls_index), + (void *)loc->offset); /* valid module index is always positive */ if (tls_index.module > 0) { /* dtv = ((struct tcbhead *)tls_base)->dtv[tls_index.module] */ - bpf_probe_read(&dtv, sizeof(dtv), - &((struct tcbhead *)tls_base)->dtv); + bpf_probe_read_user(&dtv, sizeof(dtv), + &((struct tcbhead *)tls_base)->dtv); dtv += tls_index.module; } else { dtv = NULL; } - bpf_probe_read(&tls_ptr, sizeof(void *), dtv); + bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv); /* if pointer has (void *)-1 value, then TLS wasn't initialized yet */ return tls_ptr && tls_ptr != (void *)-1 ? tls_ptr + tls_index.offset @@ -336,7 +336,7 @@ static __always_inline void read_int_var(struct strobemeta_cfg *cfg, if (!location) return; - bpf_probe_read(value, sizeof(struct strobe_value_generic), location); + bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); data->int_vals[idx] = value->val; if (value->header.len) data->int_vals_set_mask |= (1 << idx); @@ -356,13 +356,13 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, if (!location) return 0; - bpf_probe_read(value, sizeof(struct strobe_value_generic), location); - len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, value->ptr); + bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); + len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr); /* - * if bpf_probe_read_str returns error (<0), due to casting to + * if bpf_probe_read_user_str returns error (<0), due to casting to * unsinged int, it will become big number, so next check is * sufficient to check for errors AND prove to BPF verifier, that - * bpf_probe_read_str won't return anything bigger than + * bpf_probe_read_user_str won't return anything bigger than * STROBE_MAX_STR_LEN */ if (len > STROBE_MAX_STR_LEN) @@ -391,8 +391,8 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, if (!location) return payload; - bpf_probe_read(value, sizeof(struct strobe_value_generic), location); - if (bpf_probe_read(&map, sizeof(struct strobe_map_raw), value->ptr)) + bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location); + if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr)) return payload; descr->id = map.id; @@ -402,7 +402,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, data->req_meta_valid = 1; } - len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, map.tag); + len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag); if (len <= STROBE_MAX_STR_LEN) { descr->tag_len = len; payload += len; @@ -413,17 +413,20 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, #else #pragma unroll #endif - for (int i = 0; i < STROBE_MAX_MAP_ENTRIES && i < map.cnt; ++i) { + for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) { + if (i >= map.cnt) + break; + descr->key_lens[i] = 0; - len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, - map.entries[i].key); + len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + map.entries[i].key); if (len <= STROBE_MAX_STR_LEN) { descr->key_lens[i] = len; payload += len; } descr->val_lens[i] = 0; - len = bpf_probe_read_str(payload, STROBE_MAX_STR_LEN, - map.entries[i].val); + len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, + map.entries[i].val); if (len <= STROBE_MAX_STR_LEN) { descr->val_lens[i] = len; payload += len; diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c new file mode 100644 index 000000000000..1f407e65ae52 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) +TAIL_FUNC(2) + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + /* Multiple locations to make sure we patch + * all of them. + */ + bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call(skb, &jmp_table, 0); + + bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call(skb, &jmp_table, 1); + + bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call(skb, &jmp_table, 2); + + return 3; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c new file mode 100644 index 000000000000..a093e739cf0e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 5); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 1); + return 0; +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 2); + return 1; +} + +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) +{ + return 2; +} + +SEC("classifier/3") +int bpf_func_3(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 4); + return 3; +} + +SEC("classifier/4") +int bpf_func_4(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 3); + return 4; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 0); + /* Check multi-prog update. */ + bpf_tail_call(skb, &jmp_table, 2); + /* Check tail call limit. */ + bpf_tail_call(skb, &jmp_table, 3); + return 3; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c new file mode 100644 index 000000000000..cabda877cf0a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int count; + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + count++; + bpf_tail_call(skb, &jmp_table, 0); + return 1; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, 0); + return 0; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c new file mode 100644 index 000000000000..f82075b47d7d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall4.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int selector; + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) +TAIL_FUNC(2) + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call(skb, &jmp_table, selector); + return 3; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c new file mode 100644 index 000000000000..ce5450744fd4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall5.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int selector; + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) +TAIL_FUNC(2) + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + int idx = 0; + + if (selector == 1234) + idx = 1; + else if (selector == 5678) + idx = 2; + + bpf_tail_call(skb, &jmp_table, idx); + return 3; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c index 233bdcb1659e..0cb3204ddb18 100644 --- a/tools/testing/selftests/bpf/progs/tcp_rtt.c +++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; __u32 _version SEC("version") = 1; @@ -13,13 +13,12 @@ struct tcp_rtt_storage { __u32 icsk_retransmits; }; -struct bpf_map_def SEC("maps") socket_storage_map = { - .type = BPF_MAP_TYPE_SK_STORAGE, - .key_size = sizeof(int), - .value_size = sizeof(struct tcp_rtt_storage), - .map_flags = BPF_F_NO_PREALLOC, -}; -BPF_ANNOTATE_KV_PAIR(socket_storage_map, int, struct tcp_rtt_storage); +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tcp_rtt_storage); +} socket_storage_map SEC(".maps"); SEC("sockops") int _sockops(struct bpf_sock_ops *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c index 4cd5e860c903..b7fc85769bdc 100644 --- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c +++ b/tools/testing/selftests/bpf/progs/test_adjust_tail.c @@ -7,7 +7,7 @@ */ #include <linux/bpf.h> #include <linux/if_ether.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 63a8dfef893b..dd8fae6660ab 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -3,50 +3,39 @@ #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); - __type(key, int); - __type(value, int); -} results_map SEC(".maps"); +int kprobe_res = 0; +int kretprobe_res = 0; +int uprobe_res = 0; +int uretprobe_res = 0; SEC("kprobe/sys_nanosleep") -int handle_sys_nanosleep_entry(struct pt_regs *ctx) +int handle_kprobe(struct pt_regs *ctx) { - const int key = 0, value = 1; - - bpf_map_update_elem(&results_map, &key, &value, 0); + kprobe_res = 1; return 0; } SEC("kretprobe/sys_nanosleep") -int handle_sys_getpid_return(struct pt_regs *ctx) +int handle_kretprobe(struct pt_regs *ctx) { - const int key = 1, value = 2; - - bpf_map_update_elem(&results_map, &key, &value, 0); + kretprobe_res = 2; return 0; } SEC("uprobe/trigger_func") -int handle_uprobe_entry(struct pt_regs *ctx) +int handle_uprobe(struct pt_regs *ctx) { - const int key = 2, value = 3; - - bpf_map_update_elem(&results_map, &key, &value, 0); + uprobe_res = 3; return 0; } SEC("uretprobe/trigger_func") -int handle_uprobe_return(struct pt_regs *ctx) +int handle_uretprobe(struct pt_regs *ctx) { - const int key = 3, value = 4; - - bpf_map_update_elem(&results_map, &key, &value, 0); + uretprobe_res = 4; return 0; } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c index e5c79fe0ffdb..88b0566da13d 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" int _version SEC("version") = 1; @@ -25,7 +26,7 @@ struct dummy_tracepoint_args { }; __attribute__((noinline)) -static int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -43,7 +44,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -static int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(struct dummy_tracepoint_args *arg) { return test_long_fname_2(arg); } diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 5ee3622ddebb..a924e53c8e9d 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" int _version SEC("version") = 1; @@ -33,7 +34,7 @@ struct dummy_tracepoint_args { }; __attribute__((noinline)) -static int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -56,7 +57,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -static int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(struct dummy_tracepoint_args *arg) { return test_long_fname_2(arg); } diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c index 434188c37774..983aedd1c072 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2018 Facebook */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; @@ -23,7 +23,7 @@ struct dummy_tracepoint_args { }; __attribute__((noinline)) -static int test_long_fname_2(struct dummy_tracepoint_args *arg) +int test_long_fname_2(struct dummy_tracepoint_args *arg) { struct ipv_counts *counts; int key = 0; @@ -41,7 +41,7 @@ static int test_long_fname_2(struct dummy_tracepoint_args *arg) } __attribute__((noinline)) -static int test_long_fname_1(struct dummy_tracepoint_args *arg) +int test_long_fname_1(struct dummy_tracepoint_args *arg) { return test_long_fname_2(arg); } diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c new file mode 100644 index 000000000000..3ac3603ad53d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdint.h> +#include <stdbool.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +/* non-existing BPF helper, to test dead code elimination */ +static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; + +extern int LINUX_KERNEL_VERSION __kconfig; +extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ +extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; +extern bool CONFIG_BOOL __kconfig __weak; +extern char CONFIG_CHAR __kconfig __weak; +extern uint16_t CONFIG_USHORT __kconfig __weak; +extern int CONFIG_INT __kconfig __weak; +extern uint64_t CONFIG_ULONG __kconfig __weak; +extern const char CONFIG_STR[8] __kconfig __weak; +extern uint64_t CONFIG_MISSING __kconfig __weak; + +uint64_t kern_ver = -1; +uint64_t bpf_syscall = -1; +uint64_t tristate_val = -1; +uint64_t bool_val = -1; +uint64_t char_val = -1; +uint64_t ushort_val = -1; +uint64_t int_val = -1; +uint64_t ulong_val = -1; +char str_val[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; +uint64_t missing_val = -1; + +SEC("raw_tp/sys_enter") +int handle_sys_enter(struct pt_regs *ctx) +{ + int i; + + kern_ver = LINUX_KERNEL_VERSION; + bpf_syscall = CONFIG_BPF_SYSCALL; + tristate_val = CONFIG_TRISTATE; + bool_val = CONFIG_BOOL; + char_val = CONFIG_CHAR; + ushort_val = CONFIG_USHORT; + int_val = CONFIG_INT; + ulong_val = CONFIG_ULONG; + + for (i = 0; i < sizeof(CONFIG_STR); i++) { + str_val[i] = CONFIG_STR[i]; + } + + if (CONFIG_MISSING) + /* invalid, but dead code - never executed */ + missing_val = bpf_missing_helper(ctx, 123); + else + missing_val = 0xDEADC0DE; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c new file mode 100644 index 000000000000..51b3f79df523 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_arrays_output { + int a2; + char b123; + int c1c; + int d00d; + int f01c; +}; + +struct core_reloc_arrays_substruct { + int c; + int d; +}; + +struct core_reloc_arrays { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_arrays(void *ctx) +{ + struct core_reloc_arrays *in = (void *)&data.in; + struct core_reloc_arrays_output *out = (void *)&data.out; + + if (CORE_READ(&out->a2, &in->a[2])) + return 1; + if (CORE_READ(&out->b123, &in->b[1][2][3])) + return 1; + if (CORE_READ(&out->c1c, &in->c[1].c)) + return 1; + if (CORE_READ(&out->d00d, &in->d[0][0].d)) + return 1; + if (CORE_READ(&out->f01c, &in->f[0][1].c)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c new file mode 100644 index 000000000000..56aec20212b5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +}; + +/* bitfield read results, all as plain integers */ +struct core_reloc_bitfields_output { + int64_t ub1; + int64_t ub2; + int64_t ub7; + int64_t sb4; + int64_t sb20; + int64_t u32; + int64_t s32; +}; + +struct pt_regs; + +struct trace_sys_enter { + struct pt_regs *regs; + long id; +}; + +SEC("tp_btf/sys_enter") +int test_core_bitfields_direct(void *ctx) +{ + struct core_reloc_bitfields *in = (void *)&data.in; + struct core_reloc_bitfields_output *out = (void *)&data.out; + + out->ub1 = BPF_CORE_READ_BITFIELD(in, ub1); + out->ub2 = BPF_CORE_READ_BITFIELD(in, ub2); + out->ub7 = BPF_CORE_READ_BITFIELD(in, ub7); + out->sb4 = BPF_CORE_READ_BITFIELD(in, sb4); + out->sb20 = BPF_CORE_READ_BITFIELD(in, sb20); + out->u32 = BPF_CORE_READ_BITFIELD(in, u32); + out->s32 = BPF_CORE_READ_BITFIELD(in, s32); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c new file mode 100644 index 000000000000..ab1e647aeb31 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +}; + +/* bitfield read results, all as plain integers */ +struct core_reloc_bitfields_output { + int64_t ub1; + int64_t ub2; + int64_t ub7; + int64_t sb4; + int64_t sb20; + int64_t u32; + int64_t s32; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_bitfields(void *ctx) +{ + struct core_reloc_bitfields *in = (void *)&data.in; + struct core_reloc_bitfields_output *out = (void *)&data.out; + uint64_t res; + + out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1); + out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2); + out->ub7 = BPF_CORE_READ_BITFIELD_PROBED(in, ub7); + out->sb4 = BPF_CORE_READ_BITFIELD_PROBED(in, sb4); + out->sb20 = BPF_CORE_READ_BITFIELD_PROBED(in, sb20); + out->u32 = BPF_CORE_READ_BITFIELD_PROBED(in, u32); + out->s32 = BPF_CORE_READ_BITFIELD_PROBED(in, s32); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c new file mode 100644 index 000000000000..7e45e2bdf6cd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_existence_output { + int a_exists; + int a_value; + int b_exists; + int b_value; + int c_exists; + int c_value; + int arr_exists; + int arr_value; + int s_exists; + int s_value; +}; + +struct core_reloc_existence { + struct { + int x; + } s; + int arr[1]; + int a; + struct { + int b; + }; + int c; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_existence(void *ctx) +{ + struct core_reloc_existence *in = (void *)&data.in; + struct core_reloc_existence_output *out = (void *)&data.out; + + out->a_exists = bpf_core_field_exists(in->a); + if (bpf_core_field_exists(in->a)) + out->a_value = BPF_CORE_READ(in, a); + else + out->a_value = 0xff000001u; + + out->b_exists = bpf_core_field_exists(in->b); + if (bpf_core_field_exists(in->b)) + out->b_value = BPF_CORE_READ(in, b); + else + out->b_value = 0xff000002u; + + out->c_exists = bpf_core_field_exists(in->c); + if (bpf_core_field_exists(in->c)) + out->c_value = BPF_CORE_READ(in, c); + else + out->c_value = 0xff000003u; + + out->arr_exists = bpf_core_field_exists(in->arr); + if (bpf_core_field_exists(in->arr)) + out->arr_value = BPF_CORE_READ(in, arr[0]); + else + out->arr_value = 0xff000004u; + + out->s_exists = bpf_core_field_exists(in->s); + if (bpf_core_field_exists(in->s)) + out->s_value = BPF_CORE_READ(in, s.x); + else + out->s_value = 0xff000005u; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c new file mode 100644 index 000000000000..525acc2f841b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_flavors.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_flavors { + int a; + int b; + int c; +}; + +/* local flavor with reversed layout */ +struct core_reloc_flavors___reversed { + int c; + int b; + int a; +}; + +/* local flavor with nested/overlapping layout */ +struct core_reloc_flavors___weird { + struct { + int b; + }; + /* a and c overlap in local flavor, but this should still work + * correctly with target original flavor + */ + union { + int a; + int c; + }; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_flavors(void *ctx) +{ + struct core_reloc_flavors *in_orig = (void *)&data.in; + struct core_reloc_flavors___reversed *in_rev = (void *)&data.in; + struct core_reloc_flavors___weird *in_weird = (void *)&data.in; + struct core_reloc_flavors *out = (void *)&data.out; + + /* read a using weird layout */ + if (CORE_READ(&out->a, &in_weird->a)) + return 1; + /* read b using reversed layout */ + if (CORE_READ(&out->b, &in_rev->b)) + return 1; + /* read c using original layout */ + if (CORE_READ(&out->c, &in_orig->c)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c new file mode 100644 index 000000000000..6b5290739806 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ints.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_ints { + uint8_t u8_field; + int8_t s8_field; + uint16_t u16_field; + int16_t s16_field; + uint32_t u32_field; + int32_t s32_field; + uint64_t u64_field; + int64_t s64_field; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_ints(void *ctx) +{ + struct core_reloc_ints *in = (void *)&data.in; + struct core_reloc_ints *out = (void *)&data.out; + + if (CORE_READ(&out->u8_field, &in->u8_field) || + CORE_READ(&out->s8_field, &in->s8_field) || + CORE_READ(&out->u16_field, &in->u16_field) || + CORE_READ(&out->s16_field, &in->s16_field) || + CORE_READ(&out->u32_field, &in->u32_field) || + CORE_READ(&out->s32_field, &in->s32_field) || + CORE_READ(&out->u64_field, &in->u64_field) || + CORE_READ(&out->s64_field, &in->s64_field)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c new file mode 100644 index 000000000000..aba928fd60d3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + uint64_t my_pid_tgid; +} data = {}; + +struct core_reloc_kernel_output { + int valid[10]; + /* we have test_progs[-flavor], so cut flavor part */ + char comm[sizeof("test_progs")]; + int comm_len; +}; + +struct task_struct { + int pid; + int tgid; + char comm[16]; + struct task_struct *group_leader; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_kernel(void *ctx) +{ + struct task_struct *task = (void *)bpf_get_current_task(); + struct core_reloc_kernel_output *out = (void *)&data.out; + uint64_t pid_tgid = bpf_get_current_pid_tgid(); + uint32_t real_tgid = (uint32_t)pid_tgid; + int pid, tgid; + + if (data.my_pid_tgid != pid_tgid) + return 0; + + if (CORE_READ(&pid, &task->pid) || + CORE_READ(&tgid, &task->tgid)) + return 1; + + /* validate pid + tgid matches */ + out->valid[0] = (((uint64_t)pid << 32) | tgid) == pid_tgid; + + /* test variadic BPF_CORE_READ macros */ + out->valid[1] = BPF_CORE_READ(task, + tgid) == real_tgid; + out->valid[2] = BPF_CORE_READ(task, + group_leader, + tgid) == real_tgid; + out->valid[3] = BPF_CORE_READ(task, + group_leader, group_leader, + tgid) == real_tgid; + out->valid[4] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + tgid) == real_tgid; + out->valid[5] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, + tgid) == real_tgid; + out->valid[6] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, + tgid) == real_tgid; + out->valid[7] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + tgid) == real_tgid; + out->valid[8] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + group_leader, + tgid) == real_tgid; + out->valid[9] = BPF_CORE_READ(task, + group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, + group_leader, group_leader, + tgid) == real_tgid; + + /* test BPF_CORE_READ_STR_INTO() returns correct code and contents */ + out->comm_len = BPF_CORE_READ_STR_INTO( + &out->comm, task, + group_leader, group_leader, group_leader, group_leader, + group_leader, group_leader, group_leader, group_leader, + comm); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c new file mode 100644 index 000000000000..d5756dbdef82 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_misc.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_misc_output { + int a, b, c; +}; + +struct core_reloc_misc___a { + int a1; + int a2; +}; + +struct core_reloc_misc___b { + int b1; + int b2; +}; + +/* fixed two first members, can be extended with new fields */ +struct core_reloc_misc_extensible { + int a; + int b; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_misc(void *ctx) +{ + struct core_reloc_misc___a *in_a = (void *)&data.in; + struct core_reloc_misc___b *in_b = (void *)&data.in; + struct core_reloc_misc_extensible *in_ext = (void *)&data.in; + struct core_reloc_misc_output *out = (void *)&data.out; + + /* record two different relocations with the same accessor string */ + if (CORE_READ(&out->a, &in_a->a1) || /* accessor: 0:0 */ + CORE_READ(&out->b, &in_b->b1)) /* accessor: 0:0 */ + return 1; + + /* Validate relocations capture array-only accesses for structs with + * fixed header, but with potentially extendable tail. This will read + * first 4 bytes of 2nd element of in_ext array of potentially + * variably sized struct core_reloc_misc_extensible. */ + if (CORE_READ(&out->c, &in_ext[2])) /* accessor: 2 */ + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c new file mode 100644 index 000000000000..8b533db4a7a5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_mods_output { + int a, b, c, d, e, f, g, h; +}; + +typedef const int int_t; +typedef const char *char_ptr_t; +typedef const int arr_t[7]; + +struct core_reloc_mods_substruct { + int x; + int y; +}; + +typedef struct { + int x; + int y; +} core_reloc_mods_substruct_t; + +struct core_reloc_mods { + int a; + int_t b; + char *c; + char_ptr_t d; + int e[3]; + arr_t f; + struct core_reloc_mods_substruct g; + core_reloc_mods_substruct_t h; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_mods(void *ctx) +{ + struct core_reloc_mods *in = (void *)&data.in; + struct core_reloc_mods_output *out = (void *)&data.out; + + if (CORE_READ(&out->a, &in->a) || + CORE_READ(&out->b, &in->b) || + CORE_READ(&out->c, &in->c) || + CORE_READ(&out->d, &in->d) || + CORE_READ(&out->e, &in->e[2]) || + CORE_READ(&out->f, &in->f[1]) || + CORE_READ(&out->g, &in->g.x) || + CORE_READ(&out->h, &in->h.y)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c new file mode 100644 index 000000000000..2b4b6d49c677 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_nesting.c @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_nesting_substruct { + int a; +}; + +union core_reloc_nesting_subunion { + int b; +}; + +/* int a.a.a and b.b.b accesses */ +struct core_reloc_nesting { + union { + struct core_reloc_nesting_substruct a; + } a; + struct { + union core_reloc_nesting_subunion b; + } b; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_nesting(void *ctx) +{ + struct core_reloc_nesting *in = (void *)&data.in; + struct core_reloc_nesting *out = (void *)&data.out; + + if (CORE_READ(&out->a.a.a, &in->a.a.a)) + return 1; + if (CORE_READ(&out->b.b.b, &in->b.b.b)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c new file mode 100644 index 000000000000..2a8975678aa6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_primitives.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +enum core_reloc_primitives_enum { + A = 0, + B = 1, +}; + +struct core_reloc_primitives { + char a; + int b; + enum core_reloc_primitives_enum c; + void *d; + int (*f)(const char *); +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_primitives(void *ctx) +{ + struct core_reloc_primitives *in = (void *)&data.in; + struct core_reloc_primitives *out = (void *)&data.out; + + if (CORE_READ(&out->a, &in->a) || + CORE_READ(&out->b, &in->b) || + CORE_READ(&out->c, &in->c) || + CORE_READ(&out->d, &in->d) || + CORE_READ(&out->f, &in->f)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c new file mode 100644 index 000000000000..ca61a5183b88 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_ptr_as_arr.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_ptr_as_arr { + int a; +}; + +#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) + +SEC("raw_tracepoint/sys_enter") +int test_core_ptr_as_arr(void *ctx) +{ + struct core_reloc_ptr_as_arr *in = (void *)&data.in; + struct core_reloc_ptr_as_arr *out = (void *)&data.out; + + if (CORE_READ(&out->a, &in[2].a)) + return 1; + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c new file mode 100644 index 000000000000..d7fb6cfc7891 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; +} data = {}; + +struct core_reloc_size_output { + int int_sz; + int struct_sz; + int union_sz; + int arr_sz; + int arr_elem_sz; + int ptr_sz; + int enum_sz; +}; + +struct core_reloc_size { + int int_field; + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE = 123 } enum_field; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_size(void *ctx) +{ + struct core_reloc_size *in = (void *)&data.in; + struct core_reloc_size_output *out = (void *)&data.out; + + out->int_sz = bpf_core_field_size(in->int_field); + out->struct_sz = bpf_core_field_size(in->struct_field); + out->union_sz = bpf_core_field_size(in->union_field); + out->arr_sz = bpf_core_field_size(in->arr_field); + out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]); + out->ptr_sz = bpf_core_field_size(in->ptr_field); + out->enum_sz = bpf_core_field_size(in->enum_field); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index 33254b771384..29817a703984 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* Permit pretty deep stack traces */ #define MAX_STACK_RAWTP 100 @@ -47,15 +47,14 @@ struct { * issue and avoid complicated C programming massaging. * This is an acceptable workaround since there is one entry here. */ -typedef __u64 raw_stack_trace_t[2 * MAX_STACK_RAWTP]; struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(max_entries, 1); __type(key, __u32); - __type(value, raw_stack_trace_t); + __type(value, __u64[2 * MAX_STACK_RAWTP]); } rawdata_map SEC(".maps"); -SEC("tracepoint/raw_syscalls/sys_enter") +SEC("raw_tracepoint/sys_enter") int bpf_prog1(void *ctx) { int max_len, max_buildid_len, usize, ksize, total_size; @@ -100,4 +99,3 @@ int bpf_prog1(void *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c index 32a6073acb99..dd7a4d3dbc0d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_data.c +++ b/tools/testing/selftests/bpf/progs/test_global_data.c @@ -5,7 +5,7 @@ #include <linux/pkt_cls.h> #include <string.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c new file mode 100644 index 000000000000..880260f6d536 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#ifndef MAX_STACK +#define MAX_STACK (512 - 3 * 32 + 8) +#endif + +static __attribute__ ((noinline)) +int f0(int var, struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + volatile char buf[MAX_STACK] = {}; + + return f0(0, skb) + skb->len; +} + +int f3(int, struct __sk_buff *skb, int); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, skb, 1); +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->ifindex * val * var; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c new file mode 100644 index 000000000000..2c18d82923a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func2.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#define MAX_STACK (512 - 3 * 32) +#include "test_global_func1.c" diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c new file mode 100644 index 000000000000..86f0ecb304fc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func3.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + val; +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb, int var) +{ + return f2(var, skb) + val; +} + +__attribute__ ((noinline)) +int f4(struct __sk_buff *skb) +{ + return f3(1, skb, 2); +} + +__attribute__ ((noinline)) +int f5(struct __sk_buff *skb) +{ + return f4(skb); +} + +__attribute__ ((noinline)) +int f6(struct __sk_buff *skb) +{ + return f5(skb); +} + +__attribute__ ((noinline)) +int f7(struct __sk_buff *skb) +{ + return f6(skb); +} + +#ifndef NO_FN8 +__attribute__ ((noinline)) +int f8(struct __sk_buff *skb) +{ + return f7(skb); +} +#endif + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ +#ifndef NO_FN8 + return f8(skb); +#else + return f7(skb); +#endif +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c new file mode 100644 index 000000000000..610f75edf276 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func4.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#define NO_FN8 +#include "test_global_func3.c" diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c new file mode 100644 index 000000000000..260c25b827ef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func5.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +int f3(int, struct __sk_buff *skb); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, (void *)&val); /* type mismatch */ +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb) +{ + return skb->ifindex * val; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f1(skb) + f2(2, skb) + f3(3, skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c new file mode 100644 index 000000000000..69e19c64e10b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func6.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +int f1(struct __sk_buff *skb) +{ + return skb->len; +} + +int f3(int, struct __sk_buff *skb); + +__attribute__ ((noinline)) +int f2(int val, struct __sk_buff *skb) +{ + return f1(skb) + f3(val, skb + 1); /* type mismatch */ +} + +__attribute__ ((noinline)) +int f3(int val, struct __sk_buff *skb) +{ + return skb->ifindex * val; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + return f1(skb) + f2(2, skb) + f3(3, skb); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c new file mode 100644 index 000000000000..309b3f6136bd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_func7.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Facebook */ +#include <stddef.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__attribute__ ((noinline)) +void foo(struct __sk_buff *skb) +{ + skb->tc_index = 0; +} + +SEC("classifier/test") +int test_cls(struct __sk_buff *skb) +{ + foo(skb); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c index 1d652ee8e73d..33493911d87a 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb.c @@ -17,9 +17,9 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "test_iptunnel_common.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 2e4efe70b1e5..28351936a438 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -13,9 +13,9 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #include "test_iptunnel_common.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c index 4147130cc3b7..7a6620671a83 100644 --- a/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c +++ b/tools/testing/selftests/bpf/progs/test_lirc_mode2_kern.c @@ -5,7 +5,7 @@ #include <linux/bpf.h> #include <linux/lirc.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("lirc_mode2") int bpf_decoder(unsigned int *sample) diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c index c957d6dfe6d7..d6cb986e7533 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c @@ -4,8 +4,8 @@ #include <linux/bpf.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct grehdr { __be16 flags; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c index a334a0e882e4..48ff2b2ad5e7 100644 --- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c +++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c @@ -3,8 +3,8 @@ #include <errno.h> #include <linux/seg6_local.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ @@ -12,10 +12,6 @@ #define SR6_FLAG_ALERT (1 << 4) -#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ - 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) -#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ - 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) #define BPF_PACKET_HEADER __attribute__((packed)) struct ip6_t { @@ -276,8 +272,8 @@ int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh) return 0; // check if egress TLV value is correct - if (ntohll(egr_addr.hi) == 0xfd00000000000000 && - ntohll(egr_addr.lo) == 0x4) + if (bpf_be64_to_cpu(egr_addr.hi) == 0xfd00000000000000 && + bpf_be64_to_cpu(egr_addr.lo) == 0x4) return 1; } @@ -308,8 +304,8 @@ int __encap_srh(struct __sk_buff *skb) #pragma clang loop unroll(full) for (unsigned long long lo = 0; lo < 4; lo++) { - seg->lo = htonll(4 - lo); - seg->hi = htonll(hi); + seg->lo = bpf_cpu_to_be64(4 - lo); + seg->hi = bpf_cpu_to_be64(hi); seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg)); } @@ -349,8 +345,8 @@ int __add_egr_x(struct __sk_buff *skb) if (err) return BPF_DROP; - addr.lo = htonll(lo); - addr.hi = htonll(hi); + addr.lo = bpf_cpu_to_be64(lo); + addr.hi = bpf_cpu_to_be64(hi); err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, (void *)&addr, sizeof(addr)); if (err) diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index 113226115365..1cfeb940cf9f 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -3,7 +3,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/types.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c index bb7ce35f691b..b5c07ae7b68f 100644 --- a/tools/testing/selftests/bpf/progs/test_map_lock.c +++ b/tools/testing/selftests/bpf/progs/test_map_lock.c @@ -2,7 +2,7 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define VAR_NUM 16 diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c new file mode 100644 index 000000000000..6239596cd14e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_mmap.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 512 * 4); /* at least 4 pages of data */ + __uint(map_flags, BPF_F_MMAPABLE); + __type(key, __u32); + __type(value, __u64); +} data_map SEC(".maps"); + +__u64 in_val = 0; +__u64 out_val = 0; + +SEC("raw_tracepoint/sys_enter") +int test_mmap(void *ctx) +{ + int zero = 0, one = 1, two = 2, far = 1500; + __u64 val, *p; + + out_val = in_val; + + /* data_map[2] = in_val; */ + bpf_map_update_elem(&data_map, &two, (const void *)&in_val, 0); + + /* data_map[1] = data_map[0] * 2; */ + p = bpf_map_lookup_elem(&data_map, &zero); + if (p) { + val = (*p) * 2; + bpf_map_update_elem(&data_map, &one, &val, 0); + } + + /* data_map[far] = in_val * 3; */ + val = in_val * 3; + bpf_map_update_elem(&data_map, &far, &val, 0); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c index 3d30c02bdae9..98b9de2fafd0 100644 --- a/tools/testing/selftests/bpf/progs/test_obj_id.c +++ b/tools/testing/selftests/bpf/progs/test_obj_id.c @@ -4,7 +4,7 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* It is a dumb bpf program such that it must have no * issue to be loaded since testing the verifier is diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c new file mode 100644 index 000000000000..bfe9fbcb9684 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ +#include <stdbool.h> +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/ptrace.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_trace_helpers.h" + +struct task_struct; + +SEC("kprobe/__set_task_comm") +int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec) +{ + return !tsk; +} + +SEC("kretprobe/__set_task_comm") +int BPF_KRETPROBE(prog2, + struct task_struct *tsk, const char *buf, bool exec, + int ret) +{ + return !PT_REGS_PARM1(ctx) && ret; +} + +SEC("raw_tp/task_rename") +int prog3(struct bpf_raw_tracepoint_args *ctx) +{ + return !ctx->args[0]; +} + +SEC("fentry/__set_task_comm") +int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec) +{ + return !tsk; +} + +SEC("fexit/__set_task_comm") +int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) +{ + return !tsk; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c index 876c27deb65a..ebfcc9f50c35 100644 --- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c +++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c @@ -3,7 +3,8 @@ #include <linux/ptrace.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); @@ -12,7 +13,7 @@ struct { } perf_buf_map SEC(".maps"); SEC("kprobe/sys_nanosleep") -int handle_sys_nanosleep_entry(struct pt_regs *ctx) +int BPF_KPROBE(handle_sys_nanosleep_entry) { int cpu = bpf_get_smp_processor_id(); @@ -22,4 +23,3 @@ int handle_sys_nanosleep_entry(struct pt_regs *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c new file mode 100644 index 000000000000..4ef2630292b2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int _version SEC("version") = 1; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} nopinmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, LIBBPF_PIN_NONE); +} nopinmap2 SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c new file mode 100644 index 000000000000..5412e0c732c7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +int _version SEC("version") = 1; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); + __uint(pinning, 2); /* invalid */ +} nopinmap3 SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index 7cf42d14103f..e72eba4a93d2 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -11,14 +11,76 @@ #include <linux/in.h> #include <linux/tcp.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define barrier() __asm__ __volatile__("": : :"memory") int _version SEC("version") = 1; -SEC("test1") -int process(struct __sk_buff *skb) +/* llvm will optimize both subprograms into exactly the same BPF assembly + * + * Disassembly of section .text: + * + * 0000000000000000 test_pkt_access_subprog1: + * ; return skb->len * 2; + * 0: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) + * 1: 64 00 00 00 01 00 00 00 w0 <<= 1 + * 2: 95 00 00 00 00 00 00 00 exit + * + * 0000000000000018 test_pkt_access_subprog2: + * ; return skb->len * val; + * 3: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) + * 4: 64 00 00 00 01 00 00 00 w0 <<= 1 + * 5: 95 00 00 00 00 00 00 00 exit + * + * Which makes it an interesting test for BTF-enabled verifier. + */ +static __attribute__ ((noinline)) +int test_pkt_access_subprog1(volatile struct __sk_buff *skb) +{ + return skb->len * 2; +} + +static __attribute__ ((noinline)) +int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) +{ + return skb->len * val; +} + +#define MAX_STACK (512 - 2 * 32) + +__attribute__ ((noinline)) +int get_skb_len(struct __sk_buff *skb) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->len; +} + +__attribute__ ((noinline)) +int get_constant(long val) +{ + return val - 122; +} + +int get_skb_ifindex(int, struct __sk_buff *skb, int); + +__attribute__ ((noinline)) +int test_pkt_access_subprog3(int val, struct __sk_buff *skb) +{ + return get_skb_len(skb) * get_skb_ifindex(val, skb, get_constant(123)); +} + +__attribute__ ((noinline)) +int get_skb_ifindex(int val, struct __sk_buff *skb, int var) +{ + volatile char buf[MAX_STACK] = {}; + + return skb->ifindex * val * var; +} + +SEC("classifier/test_pkt_access") +int test_pkt_access(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; void *data = (void *)(long)skb->data; @@ -48,6 +110,12 @@ int process(struct __sk_buff *skb) tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); } + if (test_pkt_access_subprog1(skb) != skb->len * 2) + return TC_ACT_SHOT; + if (test_pkt_access_subprog2(2, skb) != skb->len * 2) + return TC_ACT_SHOT; + if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex) + return TC_ACT_SHOT; if (tcp) { if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c index 3d039e18bf82..610c74ea9f64 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c @@ -5,7 +5,7 @@ #include <string.h> #include <linux/bpf.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; @@ -27,8 +27,8 @@ int _version SEC("version") = 1; } #endif -SEC("test1") -int process(struct __sk_buff *skb) +SEC("classifier/test_pkt_md_access") +int test_pkt_md_access(struct __sk_buff *skb) { TEST_FIELD(__u8, len, 0xFF); TEST_FIELD(__u16, len, 0xFFFF); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c new file mode 100644 index 000000000000..d556b1572cc6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ptrace.h> +#include <linux/bpf.h> + +#include <netinet/in.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_trace_helpers.h" + +static struct sockaddr_in old; + +SEC("kprobe/__sys_connect") +int BPF_KPROBE(handle_sys_connect) +{ + void *ptr = (void *)PT_REGS_PARM2(ctx); + struct sockaddr_in new; + + bpf_probe_read_user(&old, sizeof(old), ptr); + __builtin_memset(&new, 0xab, sizeof(new)); + bpf_probe_write_user(ptr, &new, sizeof(new)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h index 0e014d3b2b36..4dd9806ad73b 100644 --- a/tools/testing/selftests/bpf/test_queue_stack_map.h +++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h @@ -6,7 +6,7 @@ #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c new file mode 100644 index 000000000000..ecbeea2df259 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +static volatile const struct { + unsigned a[4]; + /* + * if the struct's size is multiple of 16, compiler will put it into + * .rodata.cst16 section, which is not recognized by libbpf; work + * around this by ensuring we don't have 16-aligned struct + */ + char _y; +} rdonly_values = { .a = {2, 3, 4, 5} }; + +static volatile struct { + unsigned did_run; + unsigned iters; + unsigned sum; +} res; + +SEC("raw_tracepoint/sys_enter:skip_loop") +int skip_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + unsigned iters = 0, sum = 0; + + /* we should never enter this loop */ + while (*p & 1) { + iters++; + sum += *p; + p++; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +SEC("raw_tracepoint/sys_enter:part_loop") +int part_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + unsigned iters = 0, sum = 0; + + /* validate verifier can derive loop termination */ + while (*p < 5) { + iters++; + sum += *p; + p++; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +SEC("raw_tracepoint/sys_enter:full_loop") +int full_loop(struct pt_regs *ctx) +{ + /* prevent compiler to optimize everything out */ + unsigned * volatile p = (void *)&rdonly_values.a; + int i = sizeof(rdonly_values.a) / sizeof(rdonly_values.a[0]); + unsigned iters = 0, sum = 0; + + /* validate verifier can allow full loop as well */ + while (i > 0 ) { + iters++; + sum += *p; + p++; + i--; + } + res.did_run = 1; + res.iters = iters; + res.sum = sum; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c index 1dbe1d4d467e..a7278f064368 100644 --- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c +++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c @@ -3,8 +3,8 @@ #include <errno.h> #include <linux/seg6_local.h> #include <linux/bpf.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Packet parsing state machine helpers. */ #define cursor_advance(_cursor, _len) \ @@ -12,10 +12,6 @@ #define SR6_FLAG_ALERT (1 << 4) -#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \ - 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32)) -#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \ - 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32)) #define BPF_PACKET_HEADER __attribute__((packed)) struct ip6_t { @@ -136,8 +132,10 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb, *pad_off = 0; // we can only go as far as ~10 TLVs due to the BPF max stack size + // workaround: define induction variable "i" as "long" instead + // of "int" to prevent alu32 sub-register spilling. #pragma clang loop unroll(disable) - for (int i = 0; i < 100; i++) { + for (long i = 0; i < 100; i++) { struct sr6_tlv_t tlv; if (cur_off == *tlv_off) @@ -251,8 +249,8 @@ int __add_egr_x(struct __sk_buff *skb) if (err) return BPF_DROP; - addr.lo = htonll(lo); - addr.hi = htonll(hi); + addr.lo = bpf_cpu_to_be64(lo); + addr.hi = bpf_cpu_to_be64(hi); err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X, (void *)&addr, sizeof(addr)); if (err) diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index ea7d84f01235..26e77dcc7e91 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -11,8 +11,8 @@ #include <linux/types.h> #include <linux/if_ether.h> -#include "bpf_endian.h" -#include "bpf_helpers.h" +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> #include "test_select_reuseport_common.h" int _version SEC("version") = 1; @@ -62,7 +62,7 @@ struct { goto done; \ }) -SEC("select_by_skb_data") +SEC("sk_reuseport") int _select_by_skb_data(struct sk_reuseport_md *reuse_md) { __u32 linum, index = 0, flags = 0, index_zero = 0; @@ -113,6 +113,12 @@ int _select_by_skb_data(struct sk_reuseport_md *reuse_md) data_check.skb_ports[0] = th->source; data_check.skb_ports[1] = th->dest; + if (th->fin) + /* The connection is being torn down at the end of a + * test. It can't contain a cmd, so return early. + */ + return SK_PASS; + if ((th->doff << 2) + sizeof(*cmd) > data_check.len) GOTO_DONE(DROP_ERR_SKB_DATA); if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy, diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 0e6be01157e6..1acc91e87bfc 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -2,46 +2,39 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __type(value, __u64); -} info_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 1); - __type(key, __u32); - __type(value, __u64); -} status_map SEC(".maps"); - -SEC("send_signal_demo") -int bpf_send_signal_test(void *ctx) +#include <bpf/bpf_helpers.h> + +__u32 sig = 0, pid = 0, status = 0, signal_thread = 0; + +static __always_inline int bpf_send_signal_test(void *ctx) { - __u64 *info_val, *status_val; - __u32 key = 0, pid, sig; int ret; - status_val = bpf_map_lookup_elem(&status_map, &key); - if (!status_val || *status_val != 0) - return 0; - - info_val = bpf_map_lookup_elem(&info_map, &key); - if (!info_val || *info_val == 0) + if (status != 0 || sig == 0 || pid == 0) return 0; - sig = *info_val >> 32; - pid = *info_val & 0xffffFFFF; - if ((bpf_get_current_pid_tgid() >> 32) == pid) { - ret = bpf_send_signal(sig); + if (signal_thread) + ret = bpf_send_signal_thread(sig); + else + ret = bpf_send_signal(sig); if (ret == 0) - *status_val = 1; + status = 1; } return 0; } + +SEC("tracepoint/syscalls/sys_enter_nanosleep") +int send_signal_tp(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + +SEC("perf_event") +int send_signal_perf(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e21cd736c196..d2b38fa6a5b0 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -12,8 +12,8 @@ #include <linux/pkt_cls.h> #include <linux/tcp.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; @@ -53,7 +53,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("sk_lookup_success") +SEC("classifier/sk_lookup_success") int bpf_sk_lookup_test0(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; @@ -78,7 +78,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("sk_lookup_success_simple") +SEC("classifier/sk_lookup_success_simple") int bpf_sk_lookup_test1(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -90,7 +90,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("fail_use_after_free") +SEC("classifier/fail_use_after_free") int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -105,7 +105,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("fail_modify_sk_pointer") +SEC("classifier/fail_modify_sk_pointer") int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -120,7 +120,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("fail_modify_sk_or_null_pointer") +SEC("classifier/fail_modify_sk_or_null_pointer") int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -134,7 +134,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("fail_no_release") +SEC("classifier/fail_no_release") int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -143,7 +143,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("fail_release_twice") +SEC("classifier/fail_release_twice") int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -155,7 +155,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("fail_release_unchecked") +SEC("classifier/fail_release_unchecked") int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("fail_no_release_subcall") +SEC("classifier/fail_no_release_subcall") int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c index 68cf9829f5a7..552f2090665c 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c +++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c @@ -6,7 +6,7 @@ #include <string.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define NUM_CGROUP_LEVELS 4 diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 7a80960d7df1..202de3938494 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; char _license[] SEC("license") = "GPL"; @@ -16,6 +16,13 @@ int process(struct __sk_buff *skb) skb->cb[i]++; } skb->priority++; + skb->tstamp++; + skb->mark++; + + if (skb->wire_len != 100) + return 1; + if (skb->gso_segs != 8) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c new file mode 100644 index 000000000000..de03a90f78ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct s { + int a; + long long b; +} __attribute__((packed)); + +int in1 = 0; +long long in2 = 0; +char in3 = '\0'; +long long in4 __attribute__((aligned(64))) = 0; +struct s in5 = {}; + +long long out2 = 0; +char out3 = 0; +long long out4 = 0; +int out1 = 0; + +extern bool CONFIG_BPF_SYSCALL __kconfig; +extern int LINUX_KERNEL_VERSION __kconfig; +bool bpf_syscall = 0; +int kern_ver = 0; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + static volatile struct s out5; + + out1 = in1; + out2 = in2; + out3 = in3; + out4 = in4; + out5 = in5; + + bpf_syscall = CONFIG_BPF_SYSCALL; + kern_ver = LINUX_KERNEL_VERSION; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index a47b003623ef..9bcaa37f476a 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -5,8 +5,8 @@ #include <netinet/in.h> #include <stdbool.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> enum bpf_addr_array_idx { ADDR_SRV_IDX, diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c index a43b999c8da2..0d31a3b3505f 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c @@ -2,7 +2,7 @@ // Copyright (c) 2019 Facebook #include <linux/bpf.h> #include <linux/version.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct hmap_elem { volatile int cnt; diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index f5638e26865d..0cf0134631b4 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH #define PERF_MAX_STACK_DEPTH 127 diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index fa0be3e10a10..00ed48672620 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -2,7 +2,7 @@ // Copyright (c) 2018 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef PERF_MAX_STACK_DEPTH #define PERF_MAX_STACK_DEPTH 127 @@ -74,4 +74,3 @@ int oncpu(struct sched_switch_args *ctx) } char _license[] SEC("license") = "GPL"; -__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */ diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 608a06871572..458b0d69133e 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) @@ -44,7 +44,10 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx) unsigned long tcp_mem[TCP_MEM_LOOPS] = {}; char value[MAX_VALUE_STR_LEN]; unsigned char i, off = 0; - int ret; + /* a workaround to prevent compiler from generating + * codes verifier cannot handle yet. + */ + volatile int ret; if (ctx->write) return 0; diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index cb201cbe11e7..b2e6f9b0894d 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 5cbbff416998..2d0b0b82a78a 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -7,7 +7,7 @@ #include <linux/stddef.h> #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */ #define MAX_ULONG_STR_LEN 0xF diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c index 3af64c470d64..bf28814bfde5 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_edt.c +++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c @@ -2,12 +2,13 @@ #include <stdint.h> #include <linux/bpf.h> #include <linux/if_ether.h> +#include <linux/stddef.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/pkt_cls.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* the maximum delay we are willing to add (drop packets beyond that) */ #define TIME_HORIZON_NS (2000 * 1000 * 1000) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 74370e7e286d..37bce7a7c394 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -17,8 +17,8 @@ #include <linux/pkt_cls.h> #include <linux/types.h> -#include "bpf_endian.h" -#include "bpf_helpers.h" +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> static const int cfg_port = 8000; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c index 1ab095bcacd8..47cbe2eeae43 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c @@ -13,16 +13,35 @@ #include <sys/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> struct bpf_map_def SEC("maps") results = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), - .value_size = sizeof(__u64), - .max_entries = 1, + .value_size = sizeof(__u32), + .max_entries = 3, }; +static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, + void *iph, __u32 ip_size, + struct tcphdr *tcph) +{ + __u32 thlen = tcph->doff * 4; + + if (tcph->syn && !tcph->ack) { + // packet should only have an MSS option + if (thlen != 24) + return 0; + + if ((void *)tcph + thlen > data_end) + return 0; + + return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen); + } + return 0; +} + static __always_inline void check_syncookie(void *ctx, void *data, void *data_end) { @@ -33,8 +52,10 @@ static __always_inline void check_syncookie(void *ctx, void *data, struct ipv6hdr *ipv6h; struct tcphdr *tcph; int ret; + __u32 key_mss = 2; + __u32 key_gen = 1; __u32 key = 0; - __u64 value = 1; + __s64 seq_mss; ethh = data; if (ethh + 1 > data_end) @@ -66,6 +87,9 @@ static __always_inline void check_syncookie(void *ctx, void *data, if (sk->state != BPF_TCP_LISTEN) goto release; + seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h), + tcph); + ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h), tcph, sizeof(*tcph)); break; @@ -95,6 +119,9 @@ static __always_inline void check_syncookie(void *ctx, void *data, if (sk->state != BPF_TCP_LISTEN) goto release; + seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h), + tcph); + ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h), tcph, sizeof(*tcph)); break; @@ -103,8 +130,19 @@ static __always_inline void check_syncookie(void *ctx, void *data, return; } - if (ret == 0) - bpf_map_update_elem(&results, &key, &value, 0); + if (seq_mss > 0) { + __u32 cookie = (__u32)seq_mss; + __u32 mss = seq_mss >> 32; + + bpf_map_update_elem(&results, &key_gen, &cookie, 0); + bpf_map_update_elem(&results, &key_mss, &mss, 0); + } + + if (ret == 0) { + __u32 cookie = bpf_ntohl(tcph->ack_seq) - 1; + + bpf_map_update_elem(&results, &key, &cookie, 0); + } release: bpf_sk_release(sk); diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c index c8c595da38d4..adc83a54c352 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c @@ -36,9 +36,9 @@ #include <linux/ipv6.h> #include <linux/version.h> #include <sys/socket.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> -#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;}) +#define _(P) ({typeof(P) val = 0; bpf_probe_read_kernel(&val, sizeof(val), &P); val;}) #define TCP_ESTATS_MAGIC 0xBAADBEEF /* This test case needs "sock" and "pt_regs" data structure. diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index 2e233613d1fc..1f1966e86e9f 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -10,8 +10,8 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_tcpbpf.h" struct { @@ -131,6 +131,7 @@ int bpf_testcb(struct bpf_sock_ops *skops) g.bytes_received = skops->bytes_received; g.bytes_acked = skops->bytes_acked; } + g.num_close_events++; bpf_map_update_elem(&global_map, &key, &g, BPF_ANY); } diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 08346e7765d5..ac63410bb541 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -10,8 +10,8 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/tcp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_tcpnotify.h" struct { diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c index 04bf084517e0..4b825ee122cf 100644 --- a/tools/testing/selftests/bpf/progs/test_tracepoint.c +++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c @@ -2,7 +2,7 @@ // Copyright (c) 2017 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> /* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ struct sched_switch_args { diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c new file mode 100644 index 000000000000..e51e6e3a81c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdbool.h> +#include <stddef.h> +#include <linux/bpf.h> +#include "bpf_trace_helpers.h" + +struct task_struct; + +SEC("fentry/__set_task_comm") +int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +SEC("fexit/__set_task_comm") +int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 504df69c83df..f48dbfe24ddc 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -19,8 +19,8 @@ #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #define ERROR(ret) do {\ char fmt[] = "ERROR line:%d ret:%d\n";\ diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c index f3236ce35f31..d38153dab3dd 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __attribute__((noinline)) #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c index 9897150ed516..f024154c7be7 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __always_inline #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c index 1848da04ea41..9beb5bf80373 100644 --- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c +++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define ATTR __attribute__((noinline)) #include "test_jhash.h" diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c index 0941c655b07b..31f9bce37491 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp.c +++ b/tools/testing/selftests/bpf/progs/test_xdp.c @@ -16,8 +16,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c new file mode 100644 index 000000000000..cb8a04ab7a78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_trace_helpers.h" + +struct net_device { + /* Structure does not need to contain all entries, + * as "preserve_access_index" will use BTF to fix this... + */ + int ifindex; +} __attribute__((preserve_access_index)); + +struct xdp_rxq_info { + /* Structure does not need to contain all entries, + * as "preserve_access_index" will use BTF to fix this... + */ + struct net_device *dev; + __u32 queue_index; +} __attribute__((preserve_access_index)); + +struct xdp_buff { + void *data; + void *data_end; + void *data_meta; + void *data_hard_start; + unsigned long handle; + struct xdp_rxq_info *rxq; +} __attribute__((preserve_access_index)); + +__u64 test_result_fentry = 0; +SEC("fentry/_xdp_tx_iptunnel") +int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) +{ + test_result_fentry = xdp->rxq->dev->ifindex; + return 0; +} + +__u64 test_result_fexit = 0; +SEC("fexit/_xdp_tx_iptunnel") +int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret) +{ + test_result_fexit = ret; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c index 97175f73c3fe..fcabcda30ba3 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c @@ -12,8 +12,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "test_iptunnel_common.h" int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 8d0182650653..a7c4a7d49fe6 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -2,7 +2,7 @@ #include <linux/if_ether.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> #define __round_mask(x, y) ((__typeof__(x))((y) - 1)) #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index e88d7b9d65ab..8beecec166d9 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -13,8 +13,8 @@ #include <linux/icmpv6.h> #include <linux/tcp.h> #include <linux/udp.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> static __u32 rol32(__u32 word, unsigned int shift) { @@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } -static __attribute__ ((noinline)) +__attribute__ ((noinline)) u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; @@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -static __attribute__ ((noinline)) +__attribute__ ((noinline)) u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c index ef9e704be140..a5337cd9400b 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c @@ -10,7 +10,7 @@ * General Public License for more details. */ #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c index 365a7d2d9f5c..134768f6b788 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c @@ -22,8 +22,8 @@ #include <linux/in.h> #include <linux/pkt_cls.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here * diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c index 43b0ef1001ed..ea25e8881992 100644 --- a/tools/testing/selftests/bpf/progs/xdp_dummy.c +++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c @@ -2,7 +2,7 @@ #define KBUILD_MODNAME "xdp_dummy" #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("xdp_dummy") int xdp_dummy_prog(struct xdp_md *ctx) diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c index 1c5f298d7196..d037262c8937 100644 --- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c +++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> struct { __uint(type, BPF_MAP_TYPE_DEVMAP); diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 57912e7c94b0..94e6c2b281cb 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> -#include "bpf_helpers.h" +#include <bpf/bpf_helpers.h> SEC("tx") int xdp_tx(struct xdp_md *xdp) diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c index 112a2857f4e2..6b9ca40bd1f4 100644 --- a/tools/testing/selftests/bpf/progs/xdping_kern.c +++ b/tools/testing/selftests/bpf/progs/xdping_kern.c @@ -12,8 +12,8 @@ #include <linux/if_vlan.h> #include <linux/ip.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> #include "xdping.h" diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh new file mode 100755 index 000000000000..ac349a5cea7e --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + +case $1 in + -h|--help) + echo -e "$0 [-j <n>]" + echo -e "\tTest the different ways of building bpftool." + echo -e "" + echo -e "\tOptions:" + echo -e "\t\t-j <n>:\tPass -j flag to 'make'." + exit 0 + ;; +esac + +J=$* + +# Assume script is located under tools/testing/selftests/bpf/. We want to start +# build attempts from the top of kernel repository. +SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0) +SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH) +KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../) +cd $KDIR_ROOT_DIR +if [ ! -e tools/bpf/bpftool/Makefile ]; then + echo -e "skip: bpftool files not found!\n" + exit 0 +fi + +ERROR=0 +TMPDIR= + +# If one build fails, continue but return non-0 on exit. +return_value() { + if [ -d "$TMPDIR" ] ; then + rm -rf -- $TMPDIR + fi + exit $ERROR +} +trap return_value EXIT + +check() { + local dir=$(realpath $1) + + echo -n "binary: " + # Returns non-null if file is found (and "false" is run) + find $dir -type f -executable -name bpftool -print -exec false {} + && \ + ERROR=1 && printf "FAILURE: Did not find bpftool\n" +} + +make_and_clean() { + echo -e "\$PWD: $PWD" + echo -e "command: make -s $* >/dev/null" + make $J -s $* >/dev/null + if [ $? -ne 0 ] ; then + ERROR=1 + fi + if [ $# -ge 1 ] ; then + check ${@: -1} + else + check . + fi + ( + if [ $# -ge 1 ] ; then + cd ${@: -1} + fi + make -s clean + ) + echo +} + +make_with_tmpdir() { + local ARGS + + TMPDIR=$(mktemp -d) + if [ $# -ge 2 ] ; then + ARGS=${@:1:(($# - 1))} + fi + echo -e "\$PWD: $PWD" + echo -e "command: make -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null" + make $J -s $ARGS ${@: -1}=$TMPDIR/ >/dev/null + if [ $? -ne 0 ] ; then + ERROR=1 + fi + check $TMPDIR + rm -rf -- $TMPDIR + echo +} + +echo "Trying to build bpftool" +echo -e "... through kbuild\n" + +if [ -f ".config" ] ; then + make_and_clean tools/bpf + + ## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed + ## down from toplevel Makefile to bpftool's Makefile. + + # make_with_tmpdir tools/bpf OUTPUT + echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n" + + make_with_tmpdir tools/bpf O +else + echo -e "skip: make tools/bpf (no .config found)\n" + echo -e "skip: make tools/bpf OUTPUT=<dir> (not supported)\n" + echo -e "skip: make tools/bpf O=<dir> (no .config found)\n" +fi + +echo -e "... from kernel source tree\n" + +make_and_clean -C tools/bpf/bpftool + +make_with_tmpdir -C tools/bpf/bpftool OUTPUT + +make_with_tmpdir -C tools/bpf/bpftool O + +echo -e "... from tools/\n" +cd tools/ + +make_and_clean bpf + +## In tools/bpf/Makefile, function "descend" is called and passes $(O) and +## $(OUTPUT). We would like $(OUTPUT) to have "bpf/bpftool/" appended before +## calling bpftool's Makefile, but this is not the case as the "descend" +## function focuses on $(O)/$(subdir). However, in the present case, updating +## $(O) to have $(OUTPUT) recomputed from it in bpftool's Makefile does not +## work, because $(O) is not defined from command line and $(OUTPUT) is not +## updated in tools/scripts/Makefile.include. +## +## Workarounds would require to a) edit "descend" or use an alternative way to +## call bpftool's Makefile, b) modify the conditions to update $(OUTPUT) and +## other variables in tools/scripts/Makefile.include (at the risk of breaking +## the build of other tools), or c) append manually the "bpf/bpftool" suffix to +## $(OUTPUT) in bpf's Makefile, which may break if targets for other directories +## use "descend" in the future. + +# make_with_tmpdir bpf OUTPUT +echo -e "skip: make bpf OUTPUT=<dir> (not supported)\n" + +make_with_tmpdir bpf O + +echo -e "... from bpftool's dir\n" +cd bpf/bpftool + +make_and_clean + +make_with_tmpdir OUTPUT + +make_with_tmpdir O diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 3d617e806054..93040ca83e60 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -4148,10 +4148,6 @@ static int do_test_file(unsigned int test_num) if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) return PTR_ERR(obj); - err = bpf_object__btf_fd(obj); - if (CHECK(err == -1, "bpf_object__btf_fd: -1")) - goto done; - prog = bpf_program__next(NULL, obj); if (CHECK(!prog, "Cannot find bpf_prog")) { err = -1; diff --git a/tools/testing/selftests/bpf/test_btf_dump.c b/tools/testing/selftests/bpf/test_btf_dump.c deleted file mode 100644 index 8f850823d35f..000000000000 --- a/tools/testing/selftests/bpf/test_btf_dump.c +++ /dev/null @@ -1,143 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <errno.h> -#include <linux/err.h> -#include <btf.h> - -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) - -void btf_dump_printf(void *ctx, const char *fmt, va_list args) -{ - vfprintf(ctx, fmt, args); -} - -struct btf_dump_test_case { - const char *name; - struct btf_dump_opts opts; -} btf_dump_test_cases[] = { - {.name = "btf_dump_test_case_syntax", .opts = {}}, - {.name = "btf_dump_test_case_ordering", .opts = {}}, - {.name = "btf_dump_test_case_padding", .opts = {}}, - {.name = "btf_dump_test_case_packing", .opts = {}}, - {.name = "btf_dump_test_case_bitfields", .opts = {}}, - {.name = "btf_dump_test_case_multidim", .opts = {}}, - {.name = "btf_dump_test_case_namespacing", .opts = {}}, -}; - -static int btf_dump_all_types(const struct btf *btf, - const struct btf_dump_opts *opts) -{ - size_t type_cnt = btf__get_nr_types(btf); - struct btf_dump *d; - int err = 0, id; - - d = btf_dump__new(btf, NULL, opts, btf_dump_printf); - if (IS_ERR(d)) - return PTR_ERR(d); - - for (id = 1; id <= type_cnt; id++) { - err = btf_dump__dump_type(d, id); - if (err) - goto done; - } - -done: - btf_dump__free(d); - return err; -} - -int test_btf_dump_case(int n, struct btf_dump_test_case *test_case) -{ - char test_file[256], out_file[256], diff_cmd[1024]; - struct btf *btf = NULL; - int err = 0, fd = -1; - FILE *f = NULL; - - fprintf(stderr, "Test case #%d (%s): ", n, test_case->name); - - snprintf(test_file, sizeof(test_file), "%s.o", test_case->name); - - btf = btf__parse_elf(test_file, NULL); - if (CHECK(IS_ERR(btf), - "failed to load test BTF: %ld\n", PTR_ERR(btf))) { - err = -PTR_ERR(btf); - btf = NULL; - goto done; - } - - snprintf(out_file, sizeof(out_file), - "/tmp/%s.output.XXXXXX", test_case->name); - fd = mkstemp(out_file); - if (CHECK(fd < 0, "failed to create temp output file: %d\n", fd)) { - err = fd; - goto done; - } - f = fdopen(fd, "w"); - if (CHECK(f == NULL, "failed to open temp output file: %s(%d)\n", - strerror(errno), errno)) { - close(fd); - goto done; - } - - test_case->opts.ctx = f; - err = btf_dump_all_types(btf, &test_case->opts); - fclose(f); - close(fd); - if (CHECK(err, "failure during C dumping: %d\n", err)) { - goto done; - } - - snprintf(test_file, sizeof(test_file), "progs/%s.c", test_case->name); - /* - * Diff test output and expected test output, contained between - * START-EXPECTED-OUTPUT and END-EXPECTED-OUTPUT lines in test case. - * For expected output lines, everything before '*' is stripped out. - * Also lines containing comment start and comment end markers are - * ignored. - */ - snprintf(diff_cmd, sizeof(diff_cmd), - "awk '/START-EXPECTED-OUTPUT/{out=1;next} " - "/END-EXPECTED-OUTPUT/{out=0} " - "/\\/\\*|\\*\\//{next} " /* ignore comment start/end lines */ - "out {sub(/^[ \\t]*\\*/, \"\"); print}' '%s' | diff -u - '%s'", - test_file, out_file); - err = system(diff_cmd); - if (CHECK(err, - "differing test output, output=%s, err=%d, diff cmd:\n%s\n", - out_file, err, diff_cmd)) - goto done; - - remove(out_file); - fprintf(stderr, "OK\n"); - -done: - btf__free(btf); - return err; -} - -int main() { - int test_case_cnt, i, err, failed = 0; - - test_case_cnt = sizeof(btf_dump_test_cases) / - sizeof(btf_dump_test_cases[0]); - - for (i = 0; i < test_case_cnt; i++) { - err = test_btf_dump_case(i, &btf_dump_test_cases[i]); - if (err) - failed++; - } - - fprintf(stderr, "%d tests succeeded, %d tests failed.\n", - test_case_cnt - failed, failed); - - return failed; -} diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c deleted file mode 100644 index 7671909ee1cb..000000000000 --- a/tools/testing/selftests/bpf/test_cgroup_attach.c +++ /dev/null @@ -1,571 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* eBPF example program: - * - * - Creates arraymap in kernel with 4 bytes keys and 8 byte values - * - * - Loads eBPF program - * - * The eBPF program accesses the map passed in to store two pieces of - * information. The number of invocations of the program, which maps - * to the number of packets received, is stored to key 0. Key 1 is - * incremented on each iteration by the number of bytes stored in - * the skb. The program also stores the number of received bytes - * in the cgroup storage. - * - * - Attaches the new program to a cgroup using BPF_PROG_ATTACH - * - * - Every second, reads map[0] and map[1] to see how many bytes and - * packets were seen on any socket of tasks in the given cgroup. - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <sys/resource.h> -#include <sys/time.h> -#include <unistd.h> -#include <linux/filter.h> - -#include <linux/bpf.h> -#include <bpf/bpf.h> - -#include "bpf_util.h" -#include "bpf_rlimit.h" -#include "cgroup_helpers.h" - -#define FOO "/foo" -#define BAR "/foo/bar/" -#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -#ifdef DEBUG -#define debug(args...) printf(args) -#else -#define debug(args...) -#endif - -static int prog_load(int verdict) -{ - int ret; - struct bpf_insn prog[] = { - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, insns_cnt, "GPL", 0, - bpf_log_buf, BPF_LOG_BUF_SIZE); - - if (ret < 0) { - log_err("Loading program"); - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); - return 0; - } - return ret; -} - -static int test_foo_bar(void) -{ - int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; - - allow_prog = prog_load(1); - if (!allow_prog) - goto err; - - drop_prog = prog_load(0); - if (!drop_prog) - goto err; - - if (setup_cgroup_environment()) - goto err; - - /* Create cgroup /foo, get fd, and join it */ - foo = create_and_get_cgroup(FOO); - if (foo < 0) - goto err; - - if (join_cgroup(FOO)) - goto err; - - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo"); - goto err; - } - - debug("Attached DROP prog. This ping in cgroup /foo should fail...\n"); - assert(system(PING_CMD) != 0); - - /* Create cgroup /foo/bar, get fd, and join it */ - bar = create_and_get_cgroup(BAR); - if (bar < 0) - goto err; - - if (join_cgroup(BAR)) - goto err; - - debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); - assert(system(PING_CMD) != 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); - assert(system(PING_CMD) == 0); - - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo/bar"); - goto err; - } - - debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n" - "This ping in cgroup /foo/bar should fail...\n"); - assert(system(PING_CMD) != 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo"); - goto err; - } - - debug("Attached PASS from /foo/bar and detached DROP from /foo.\n" - "This ping in cgroup /foo/bar should pass...\n"); - assert(system(PING_CMD) == 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { - errno = 0; - log_err("Unexpected success attaching prog to /foo/bar"); - goto err; - } - - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo/bar"); - goto err; - } - - if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { - errno = 0; - log_err("Unexpected success in double detach from /foo"); - goto err; - } - - if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching non-overridable prog to /foo"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { - errno = 0; - log_err("Unexpected success attaching non-overridable prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - errno = 0; - log_err("Unexpected success attaching overridable prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - errno = 0; - log_err("Unexpected success attaching overridable prog to /foo"); - goto err; - } - - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching different non-overridable prog to /foo"); - goto err; - } - - goto out; - -err: - rc = 1; - -out: - close(foo); - close(bar); - cleanup_cgroup_environment(); - if (!rc) - printf("#override:PASS\n"); - else - printf("#override:FAIL\n"); - return rc; -} - -static int map_fd = -1; - -static int prog_load_cnt(int verdict, int val) -{ - int cgroup_storage_fd, percpu_cgroup_storage_fd; - - if (map_fd < 0) - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); - if (map_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); - if (cgroup_storage_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - percpu_cgroup_storage_fd = bpf_create_map( - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); - if (percpu_cgroup_storage_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - struct bpf_insn prog[] = { - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ - BPF_LD_MAP_FD(BPF_REG_1, map_fd), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ - - BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_MOV64_IMM(BPF_REG_1, val), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), - - BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), - - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - int ret; - - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, insns_cnt, "GPL", 0, - bpf_log_buf, BPF_LOG_BUF_SIZE); - - if (ret < 0) { - log_err("Loading program"); - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); - return 0; - } - close(cgroup_storage_fd); - return ret; -} - - -static int test_multiprog(void) -{ - __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; - int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; - int drop_prog, allow_prog[6] = {}, rc = 0; - unsigned long long value; - int i = 0; - - for (i = 0; i < 6; i++) { - allow_prog[i] = prog_load_cnt(1, 1 << i); - if (!allow_prog[i]) - goto err; - } - drop_prog = prog_load_cnt(0, 1); - if (!drop_prog) - goto err; - - if (setup_cgroup_environment()) - goto err; - - cg1 = create_and_get_cgroup("/cg1"); - if (cg1 < 0) - goto err; - cg2 = create_and_get_cgroup("/cg1/cg2"); - if (cg2 < 0) - goto err; - cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); - if (cg3 < 0) - goto err; - cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); - if (cg4 < 0) - goto err; - cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); - if (cg5 < 0) - goto err; - - if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) - goto err; - - if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog to cg1"); - goto err; - } - if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Unexpected success attaching the same prog to cg1"); - goto err; - } - if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog2 to cg1"); - goto err; - } - if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to cg2"); - goto err; - } - if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog to cg3"); - goto err; - } - if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to cg4"); - goto err; - } - if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching prog to cg5"); - goto err; - } - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 8 + 32); - - /* query the number of effective progs in cg5 */ - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - NULL, NULL, &prog_cnt) == 0); - assert(prog_cnt == 4); - /* retrieve prog_ids of effective progs in cg5 */ - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 4); - assert(attach_flags == 0); - saved_prog_id = prog_ids[0]; - /* check enospc handling */ - prog_ids[0] = 0; - prog_cnt = 2; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == -1 && - errno == ENOSPC); - assert(prog_cnt == 4); - /* check that prog_ids are returned even when buffer is too small */ - assert(prog_ids[0] == saved_prog_id); - /* retrieve prog_id of single attached prog in cg5 */ - prog_ids[0] = 0; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, - NULL, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 1); - assert(prog_ids[0] == saved_prog_id); - - /* detach bottom program and ping again */ - if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching prog from cg5"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 8 + 16); - - /* detach 3rd from bottom program and ping again */ - errno = 0; - if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { - log_err("Unexpected success on detach from cg3"); - goto err; - } - if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching from cg3"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 16); - - /* detach 2nd from bottom program and ping again */ - if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching prog from cg4"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 4); - - prog_cnt = 4; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 3); - assert(attach_flags == 0); - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, - NULL, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 0); - goto out; -err: - rc = 1; - -out: - for (i = 0; i < 6; i++) - if (allow_prog[i] > 0) - close(allow_prog[i]); - close(cg1); - close(cg2); - close(cg3); - close(cg4); - close(cg5); - cleanup_cgroup_environment(); - if (!rc) - printf("#multi:PASS\n"); - else - printf("#multi:FAIL\n"); - return rc; -} - -static int test_autodetach(void) -{ - __u32 prog_cnt = 4, attach_flags; - int allow_prog[2] = {0}; - __u32 prog_ids[2] = {0}; - int cg = 0, i, rc = -1; - void *ptr = NULL; - int attempts; - - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - allow_prog[i] = prog_load_cnt(1, 1 << i); - if (!allow_prog[i]) - goto err; - } - - if (setup_cgroup_environment()) - goto err; - - /* create a cgroup, attach two programs and remember their ids */ - cg = create_and_get_cgroup("/cg_autodetach"); - if (cg < 0) - goto err; - - if (join_cgroup("/cg_autodetach")) - goto err; - - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog[%d] to cg:egress", i); - goto err; - } - } - - /* make sure that programs are attached and run some traffic */ - assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, - prog_ids, &prog_cnt) == 0); - assert(system(PING_CMD) == 0); - - /* allocate some memory (4Mb) to pin the original cgroup */ - ptr = malloc(4 * (1 << 20)); - if (!ptr) - goto err; - - /* close programs and cgroup fd */ - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - close(allow_prog[i]); - allow_prog[i] = 0; - } - - close(cg); - cg = 0; - - /* leave the cgroup and remove it. don't detach programs */ - cleanup_cgroup_environment(); - - /* wait for the asynchronous auto-detachment. - * wait for no more than 5 sec and give up. - */ - for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { - for (attempts = 5; attempts >= 0; attempts--) { - int fd = bpf_prog_get_fd_by_id(prog_ids[i]); - - if (fd < 0) - break; - - /* don't leave the fd open */ - close(fd); - - if (!attempts) - goto err; - - sleep(1); - } - } - - rc = 0; -err: - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) - if (allow_prog[i] > 0) - close(allow_prog[i]); - if (cg) - close(cg); - free(ptr); - cleanup_cgroup_environment(); - if (!rc) - printf("#autodetach:PASS\n"); - else - printf("#autodetach:FAIL\n"); - return rc; -} - -int main(void) -{ - int (*tests[])(void) = { - test_foo_bar, - test_multiprog, - test_autodetach, - }; - int errors = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(tests); i++) - if (tests[i]()) - errors++; - - if (errors) - printf("test_cgroup_attach:FAIL\n"); - else - printf("test_cgroup_attach:PASS\n"); - - return errors ? EXIT_FAILURE : EXIT_SUCCESS; -} diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c index 2fc4625c1a15..655729004391 100644 --- a/tools/testing/selftests/bpf/test_cgroup_storage.c +++ b/tools/testing/selftests/bpf/test_cgroup_storage.c @@ -20,9 +20,9 @@ int main(int argc, char **argv) BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */ @@ -30,7 +30,7 @@ int main(int argc, char **argv) BPF_FUNC_get_local_storage), BPF_MOV64_IMM(BPF_REG_1, 1), BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1), BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp new file mode 100644 index 000000000000..a8d2e9a87fbf --- /dev/null +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include <iostream> +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include <bpf/btf.h> +#include "test_core_extern.skel.h" + +/* do nothing, just make sure we can link successfully */ + +int main(int argc, char *argv[]) +{ + struct test_core_extern *skel; + + /* libbpf.h */ + libbpf_set_print(NULL); + + /* bpf.h */ + bpf_prog_get_fd_by_id(0); + + /* btf.h */ + btf__new(NULL, 0); + + /* BPF skeleton */ + skel = test_core_extern__open_and_load(); + test_core_extern__destroy(skel); + + std::cout << "DONE!" << std::endl; + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index d23d4da66b83..174b72a64a4c 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -18,19 +18,55 @@ fi # this is the case and run it with in_netns.sh if it is being run in the root # namespace. if [[ -z $(ip netns identify $$) ]]; then + err=0 + if bpftool="$(which bpftool)"; then + echo "Testing global flow dissector..." + + $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + type flow_dissector + + if ! unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected unsuccessful attach in namespace" >&2 + err=1 + fi + + $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + flow_dissector + + if unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected successful attach in namespace" >&2 + err=1 + fi + + if ! $bpftool prog detach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Failed to detach flow dissector" >&2 + err=1 + fi + + rm -rf /sys/fs/bpf/flow + else + echo "Skipping root flow dissector test, bpftool not found" >&2 + fi + + # Run the rest of the tests in a net namespace. ../net/in_netns.sh "$0" "$@" - exit $? -fi + err=$(( $err + $? )) -# Determine selftest success via shell exit code -exit_handler() -{ - if (( $? == 0 )); then + if (( $err == 0 )); then echo "selftests: $TESTNAME [PASS]"; else echo "selftests: $TESTNAME [FAILED]"; fi + exit $err +fi + +# Determine selftest success via shell exit code +exit_handler() +{ set +e # Cleanup @@ -63,6 +99,9 @@ fi # Setup tc qdisc add dev lo ingress +echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter +echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter echo "Testing IPv4..." # Drops all IP/UDP packets coming from port 9 @@ -100,6 +139,20 @@ echo "Testing IPv4 + GRE..." tc filter del dev lo ingress pref 1337 +echo "Testing port range..." +# Drops all IP/UDP packets coming from port 8-10 +tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \ + udp src_port 8-10 action drop + +# Send 10 IPv4/UDP packets from port 7. Filter should not drop any. +./test_flow_dissector -i 4 -f 7 +# Send 10 IPv4/UDP packets from port 9. Filter should drop all. +./test_flow_dissector -i 4 -f 9 -F +# Send 10 IPv4/UDP packets from port 11. Filter should not drop any. +./test_flow_dissector -i 4 -f 11 + +tc filter del dev lo ingress pref 1337 + echo "Testing IPv6..." # Drops all IPv6/UDP packets coming from port 9 tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \ diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh new file mode 100755 index 000000000000..20de7bb873bc --- /dev/null +++ b/tools/testing/selftests/bpf/test_ftrace.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +TR=/sys/kernel/debug/tracing/ +clear_trace() { # reset trace output + echo > $TR/trace +} + +disable_tracing() { # stop trace recording + echo 0 > $TR/tracing_on +} + +enable_tracing() { # start trace recording + echo 1 > $TR/tracing_on +} + +reset_tracer() { # reset the current tracer + echo nop > $TR/current_tracer +} + +disable_tracing +clear_trace + +echo "" > $TR/set_ftrace_filter +echo '*printk* *console* *wake* *serial* *lock*' > $TR/set_ftrace_notrace + +echo "bpf_prog_test*" > $TR/set_graph_function +echo "" > $TR/set_graph_notrace + +echo function_graph > $TR/current_tracer + +enable_tracing +./test_progs -t fentry +./test_progs -t fexit +disable_tracing +clear_trace + +reset_tracer + +exit 0 diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c index b64094c981e3..c490e012c23f 100644 --- a/tools/testing/selftests/bpf/test_hashmap.c +++ b/tools/testing/selftests/bpf/test_hashmap.c @@ -8,7 +8,7 @@ #include <stdio.h> #include <errno.h> #include <linux/err.h> -#include "hashmap.h" +#include "bpf/hashmap.h" #define CHECK(condition, format...) ({ \ int __ret = !!(condition); \ diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh deleted file mode 100755 index 2989b2e2d856..000000000000 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -export TESTNAME=test_libbpf - -# Determine selftest success via shell exit code -exit_handler() -{ - if [ $? -eq 0 ]; then - echo "selftests: $TESTNAME [PASS]"; - else - echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 - echo "selftests: $TESTNAME [FAILED]"; - fi -} - -libbpf_open_file() -{ - LAST_LOADED=$1 - if [ -n "$VERBOSE" ]; then - ./test_libbpf_open $1 - else - ./test_libbpf_open --quiet $1 - fi -} - -# Exit script immediately (well catched by trap handler) if any -# program/thing exits with a non-zero status. -set -e - -# (Use 'trap -l' to list meaning of numbers) -trap exit_handler 0 2 3 6 9 - -libbpf_open_file test_l4lb.o - -# Load a program with BPF-to-BPF calls -libbpf_open_file test_l4lb_noinline.o - -# Load a program compiled without the "-target bpf" flag -libbpf_open_file test_xdp.o - -# Success -exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c deleted file mode 100644 index 9e9db202d218..000000000000 --- a/tools/testing/selftests/bpf/test_libbpf_open.c +++ /dev/null @@ -1,144 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. - */ -static const char *__doc__ = - "Libbpf test program for loading BPF ELF object files"; - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <stdarg.h> -#include <bpf/libbpf.h> -#include <getopt.h> - -#include "bpf_rlimit.h" - -static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"debug", no_argument, NULL, 'D' }, - {"quiet", no_argument, NULL, 'q' }, - {0, 0, NULL, 0 } -}; - -static void usage(char *argv[]) -{ - int i; - - printf("\nDOCUMENTATION:\n%s\n\n", __doc__); - printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); - printf(" Listing options:\n"); - for (i = 0; long_options[i].name != 0; i++) { - printf(" --%-12s", long_options[i].name); - printf(" short-option: -%c", - long_options[i].val); - printf("\n"); - } - printf("\n"); -} - -static bool debug = 0; -static int libbpf_debug_print(enum libbpf_print_level level, - const char *fmt, va_list args) -{ - if (level == LIBBPF_DEBUG && !debug) - return 0; - - fprintf(stderr, "[%d] ", level); - return vfprintf(stderr, fmt, args); -} - -#define EXIT_FAIL_LIBBPF EXIT_FAILURE -#define EXIT_FAIL_OPTION 2 - -int test_walk_progs(struct bpf_object *obj, bool verbose) -{ - struct bpf_program *prog; - int cnt = 0; - - bpf_object__for_each_program(prog, obj) { - cnt++; - if (verbose) - printf("Prog (count:%d) section_name: %s\n", cnt, - bpf_program__title(prog, false)); - } - return 0; -} - -int test_walk_maps(struct bpf_object *obj, bool verbose) -{ - struct bpf_map *map; - int cnt = 0; - - bpf_object__for_each_map(map, obj) { - cnt++; - if (verbose) - printf("Map (count:%d) name: %s\n", cnt, - bpf_map__name(map)); - } - return 0; -} - -int test_open_file(char *filename, bool verbose) -{ - struct bpf_object *bpfobj = NULL; - long err; - - if (verbose) - printf("Open BPF ELF-file with libbpf: %s\n", filename); - - /* Load BPF ELF object file and check for errors */ - bpfobj = bpf_object__open(filename); - err = libbpf_get_error(bpfobj); - if (err) { - char err_buf[128]; - libbpf_strerror(err, err_buf, sizeof(err_buf)); - if (verbose) - printf("Unable to load eBPF objects in file '%s': %s\n", - filename, err_buf); - return EXIT_FAIL_LIBBPF; - } - test_walk_progs(bpfobj, verbose); - test_walk_maps(bpfobj, verbose); - - if (verbose) - printf("Close BPF ELF-file with libbpf: %s\n", - bpf_object__name(bpfobj)); - bpf_object__close(bpfobj); - - return 0; -} - -int main(int argc, char **argv) -{ - char filename[1024] = { 0 }; - bool verbose = 1; - int longindex = 0; - int opt; - - libbpf_set_print(libbpf_debug_print); - - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hDq", - long_options, &longindex)) != -1) { - switch (opt) { - case 'D': - debug = 1; - break; - case 'q': /* Use in scripting mode */ - verbose = 0; - break; - case 'h': - default: - usage(argv); - return EXIT_FAIL_OPTION; - } - } - if (optind >= argc) { - usage(argv); - printf("ERROR: Expected BPF_FILE argument after options\n"); - return EXIT_FAIL_OPTION; - } - snprintf(filename, sizeof(filename), "%s", argv[optind]); - - return test_open_file(filename, verbose); -} diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index acf7a74f97cd..59ea56945e6c 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -314,15 +314,15 @@ test_gso() command -v nc >/dev/null 2>&1 || \ { echo >&2 "nc is not available: skipping TSO tests"; return; } - # listen on IPv*_DST, capture TCP into $TMPFILE + # listen on port 9000, capture TCP into $TMPFILE if [ "${PROTO}" == "IPv4" ] ; then IP_DST=${IPv4_DST} ip netns exec ${NS3} bash -c \ - "nc -4 -l -s ${IPv4_DST} -p 9000 > ${TMPFILE} &" + "nc -4 -l -p 9000 > ${TMPFILE} &" elif [ "${PROTO}" == "IPv6" ] ; then IP_DST=${IPv6_DST} ip netns exec ${NS3} bash -c \ - "nc -6 -l -s ${IPv6_DST} -p 9000 > ${TMPFILE} &" + "nc -6 -l -p 9000 > ${TMPFILE} &" RET=$? else echo " test_gso: unknown PROTO: ${PROTO}" diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 5443b9bd75ed..02eae1e864c2 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data) close(fd); } +static void test_devmap_hash(unsigned int task, void *data) +{ + int fd; + __u32 key, value; + + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create devmap_hash '%s'!\n", strerror(errno)); + exit(1); + } + + close(fd); +} + static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; @@ -1127,7 +1142,6 @@ out_sockmap: #define MAPINMAP_PROG "./test_map_in_map.o" static void test_map_in_map(void) { - struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; int mim_fd, fd, err; @@ -1164,9 +1178,6 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__for_each_program(prog, obj) { - bpf_program__set_xdp(prog); - } bpf_object__load(obj); map = bpf_object__find_map_by_name(obj, "mim_array"); @@ -1684,6 +1695,7 @@ static void run_all_tests(void) test_arraymap_percpu_many_keys(); test_devmap(0, NULL); + test_devmap_hash(0, NULL); test_sockmap(0, NULL); test_map_large(); @@ -1701,9 +1713,9 @@ static void run_all_tests(void) test_map_in_map(); } -#define DECLARE +#define DEFINE_TEST(name) extern void test_##name(void); #include <map_tests/tests.h> -#undef DECLARE +#undef DEFINE_TEST int main(void) { @@ -1715,9 +1727,9 @@ int main(void) map_flags = BPF_F_NO_PREALLOC; run_all_tests(); -#define CALL +#define DEFINE_TEST(name) test_##name(); #include <map_tests/tests.h> -#undef CALL +#undef DEFINE_TEST printf("test_maps: OK, %d SKIPPED\n", skips); return 0; diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 425f9ed27c3b..8294ae3ffb3c 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -22,6 +22,7 @@ import os import pprint import random import re +import stat import string import struct import subprocess @@ -311,7 +312,11 @@ class DebugfsDir: for f in out.split(): if f == "ports": continue + p = os.path.join(path, f) + if not os.stat(p).st_mode & stat.S_IRUSR: + continue + if os.path.isfile(p): _, out = cmd('cat %s/%s' % (path, f)) dfs[f] = out.strip() @@ -330,13 +335,22 @@ class NetdevSimDev: """ Class for netdevsim bus device and its attributes. """ + @staticmethod + def ctrl_write(path, val): + fullpath = os.path.join("/sys/bus/netdevsim/", path) + try: + with open(fullpath, "w") as f: + f.write(val) + except OSError as e: + log("WRITE %s: %r" % (fullpath, val), -e.errno) + raise e + log("WRITE %s: %r" % (fullpath, val), 0) def __init__(self, port_count=1): addr = 0 while True: try: - with open("/sys/bus/netdevsim/new_device", "w") as f: - f.write("%u %u" % (addr, port_count)) + self.ctrl_write("new_device", "%u %u" % (addr, port_count)) except OSError as e: if e.errno == errno.ENOSPC: addr += 1 @@ -398,14 +412,13 @@ class NetdevSimDev: return progs def remove(self): - with open("/sys/bus/netdevsim/del_device", "w") as f: - f.write("%u" % self.addr) + self.ctrl_write("del_device", "%u" % (self.addr, )) devs.remove(self) def remove_nsim(self, nsim): self.nsims.remove(nsim) - with open("/sys/bus/netdevsim/devices/netdevsim%u/del_port" % self.addr ,"w") as f: - f.write("%u" % nsim.port_index) + self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ), + "%u" % (nsim.port_index, )) class NetdevSim: """ @@ -1353,7 +1366,7 @@ try: bpftool_prog_list_wait(expected=1) ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] - fail(ifnameB != simB1['ifname'], "program not bound to originial device") + fail(ifnameB != simB1['ifname'], "program not bound to original device") simB1.remove() bpftool_prog_list_wait(expected=1) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index dae0819b1141..bab1e6f1d8f1 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -2,11 +2,176 @@ /* Copyright (c) 2017 Facebook */ #include "test_progs.h" +#include "cgroup_helpers.h" #include "bpf_rlimit.h" +#include <argp.h> +#include <string.h> -int error_cnt, pass_cnt; -bool jit_enabled; -bool verifier_stats = false; +/* defined in test_progs.h */ +struct test_env env = {}; + +struct prog_test_def { + const char *test_name; + int test_num; + void (*run_test)(void); + bool force_log; + int error_cnt; + int skip_cnt; + bool tested; + bool need_cgroup_cleanup; + + char *subtest_name; + int subtest_num; + + /* store counts before subtest started */ + int old_error_cnt; +}; + +static bool should_run(struct test_selector *sel, int num, const char *name) +{ + int i; + + for (i = 0; i < sel->blacklist.cnt; i++) { + if (strstr(name, sel->blacklist.strs[i])) + return false; + } + + for (i = 0; i < sel->whitelist.cnt; i++) { + if (strstr(name, sel->whitelist.strs[i])) + return true; + } + + if (!sel->whitelist.cnt && !sel->num_set) + return true; + + return num < sel->num_set_len && sel->num_set[num]; +} + +static void dump_test_log(const struct prog_test_def *test, bool failed) +{ + if (stdout == env.stdout) + return; + + fflush(stdout); /* exports env.log_buf & env.log_cnt */ + + if (env.verbosity > VERBOSE_NONE || test->force_log || failed) { + if (env.log_cnt) { + env.log_buf[env.log_cnt] = '\0'; + fprintf(env.stdout, "%s", env.log_buf); + if (env.log_buf[env.log_cnt - 1] != '\n') + fprintf(env.stdout, "\n"); + } + } + + fseeko(stdout, 0, SEEK_SET); /* rewind */ +} + +static void skip_account(void) +{ + if (env.test->skip_cnt) { + env.skip_cnt++; + env.test->skip_cnt = 0; + } +} + +void test__end_subtest() +{ + struct prog_test_def *test = env.test; + int sub_error_cnt = test->error_cnt - test->old_error_cnt; + + if (sub_error_cnt) + env.fail_cnt++; + else + env.sub_succ_cnt++; + skip_account(); + + dump_test_log(test, sub_error_cnt); + + fprintf(env.stdout, "#%d/%d %s:%s\n", + test->test_num, test->subtest_num, + test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); + + free(test->subtest_name); + test->subtest_name = NULL; +} + +bool test__start_subtest(const char *name) +{ + struct prog_test_def *test = env.test; + + if (test->subtest_name) + test__end_subtest(); + + test->subtest_num++; + + if (!name || !name[0]) { + fprintf(env.stderr, + "Subtest #%d didn't provide sub-test name!\n", + test->subtest_num); + return false; + } + + if (!should_run(&env.subtest_selector, test->subtest_num, name)) + return false; + + test->subtest_name = strdup(name); + if (!test->subtest_name) { + fprintf(env.stderr, + "Subtest #%d: failed to copy subtest name!\n", + test->subtest_num); + return false; + } + env.test->old_error_cnt = env.test->error_cnt; + + return true; +} + +void test__force_log() { + env.test->force_log = true; +} + +void test__skip(void) +{ + env.test->skip_cnt++; +} + +void test__fail(void) +{ + env.test->error_cnt++; +} + +int test__join_cgroup(const char *path) +{ + int fd; + + if (!env.test->need_cgroup_cleanup) { + if (setup_cgroup_environment()) { + fprintf(stderr, + "#%d %s: Failed to setup cgroup environment\n", + env.test->test_num, env.test->test_name); + return -1; + } + + env.test->need_cgroup_cleanup = true; + } + + fd = create_and_get_cgroup(path); + if (fd < 0) { + fprintf(stderr, + "#%d %s: Failed to create cgroup '%s' (errno=%d)\n", + env.test->test_num, env.test->test_name, path, errno); + return fd; + } + + if (join_cgroup(path)) { + fprintf(stderr, + "#%d %s: Failed to join cgroup '%s' (errno=%d)\n", + env.test->test_num, env.test->test_name, path, errno); + return -1; + } + + return fd; +} struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), @@ -32,7 +197,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) map = bpf_object__find_map_by_name(obj, name); if (!map) { printf("%s:FAIL:map '%s' not found\n", test, name); - error_cnt++; + test__fail(); return -1; } return bpf_map__fd(map); @@ -156,23 +321,367 @@ void *spin_lock_thread(void *arg) pthread_exit(arg); } -#define DECLARE +/* extern declarations for test funcs */ +#define DEFINE_TEST(name) extern void test_##name(void); #include <prog_tests/tests.h> -#undef DECLARE +#undef DEFINE_TEST + +static struct prog_test_def prog_test_defs[] = { +#define DEFINE_TEST(name) { \ + .test_name = #name, \ + .run_test = &test_##name, \ +}, +#include <prog_tests/tests.h> +#undef DEFINE_TEST +}; +const int prog_test_cnt = ARRAY_SIZE(prog_test_defs); + +const char *argp_program_version = "test_progs 0.1"; +const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; +const char argp_program_doc[] = "BPF selftests test runner"; + +enum ARG_KEYS { + ARG_TEST_NUM = 'n', + ARG_TEST_NAME = 't', + ARG_TEST_NAME_BLACKLIST = 'b', + ARG_VERIFIER_STATS = 's', + ARG_VERBOSE = 'v', +}; + +static const struct argp_option opts[] = { + { "num", ARG_TEST_NUM, "NUM", 0, + "Run test number NUM only " }, + { "name", ARG_TEST_NAME, "NAMES", 0, + "Run tests with names containing any string from NAMES list" }, + { "name-blacklist", ARG_TEST_NAME_BLACKLIST, "NAMES", 0, + "Don't run tests with names containing any string from NAMES list" }, + { "verifier-stats", ARG_VERIFIER_STATS, NULL, 0, + "Output verifier statistics", }, + { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL, + "Verbose output (use -vv or -vvv for progressively verbose output)" }, + {}, +}; -int main(int ac, char **av) +static int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) { + if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) + return 0; + vprintf(format, args); + return 0; +} + +static int parse_str_list(const char *s, struct str_set *set) +{ + char *input, *state = NULL, *next, **tmp, **strs = NULL; + int cnt = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; + + set->cnt = 0; + set->strs = NULL; + + while ((next = strtok_r(state ? NULL : input, ",", &state))) { + tmp = realloc(strs, sizeof(*strs) * (cnt + 1)); + if (!tmp) + goto err; + strs = tmp; + + strs[cnt] = strdup(next); + if (!strs[cnt]) + goto err; + + cnt++; + } + + set->cnt = cnt; + set->strs = (const char **)strs; + free(input); + return 0; +err: + free(strs); + free(input); + return -ENOMEM; +} + +int parse_num_list(const char *s, struct test_selector *sel) +{ + int i, set_len = 0, num, start = 0, end = -1; + bool *set = NULL, *tmp, parsing_end = false; + char *next; + + while (s[0]) { + errno = 0; + num = strtol(s, &next, 10); + if (errno) + return -errno; + + if (parsing_end) + end = num; + else + start = num; + + if (!parsing_end && *next == '-') { + s = next + 1; + parsing_end = true; + continue; + } else if (*next == ',') { + parsing_end = false; + s = next + 1; + end = num; + } else if (*next == '\0') { + parsing_end = false; + s = next; + end = num; + } else { + return -EINVAL; + } + + if (start > end) + return -EINVAL; + + if (end + 1 > set_len) { + set_len = end + 1; + tmp = realloc(set, set_len); + if (!tmp) { + free(set); + return -ENOMEM; + } + set = tmp; + } + for (i = start; i <= end; i++) { + set[i] = true; + } + + } + + if (!set) + return -EINVAL; + + sel->num_set = set; + sel->num_set_len = set_len; + + return 0; +} + +extern int extra_prog_load_log_flags; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + struct test_env *env = state->input; + + switch (key) { + case ARG_TEST_NUM: { + char *subtest_str = strchr(arg, '/'); + + if (subtest_str) { + *subtest_str = '\0'; + if (parse_num_list(subtest_str + 1, + &env->subtest_selector)) { + fprintf(stderr, + "Failed to parse subtest numbers.\n"); + return -EINVAL; + } + } + if (parse_num_list(arg, &env->test_selector)) { + fprintf(stderr, "Failed to parse test numbers.\n"); + return -EINVAL; + } + break; + } + case ARG_TEST_NAME: { + char *subtest_str = strchr(arg, '/'); + + if (subtest_str) { + *subtest_str = '\0'; + if (parse_str_list(subtest_str + 1, + &env->subtest_selector.whitelist)) + return -ENOMEM; + } + if (parse_str_list(arg, &env->test_selector.whitelist)) + return -ENOMEM; + break; + } + case ARG_TEST_NAME_BLACKLIST: { + char *subtest_str = strchr(arg, '/'); + + if (subtest_str) { + *subtest_str = '\0'; + if (parse_str_list(subtest_str + 1, + &env->subtest_selector.blacklist)) + return -ENOMEM; + } + if (parse_str_list(arg, &env->test_selector.blacklist)) + return -ENOMEM; + break; + } + case ARG_VERIFIER_STATS: + env->verifier_stats = true; + break; + case ARG_VERBOSE: + env->verbosity = VERBOSE_NORMAL; + if (arg) { + if (strcmp(arg, "v") == 0) { + env->verbosity = VERBOSE_VERY; + extra_prog_load_log_flags = 1; + } else if (strcmp(arg, "vv") == 0) { + env->verbosity = VERBOSE_SUPER; + extra_prog_load_log_flags = 2; + } else { + fprintf(stderr, + "Unrecognized verbosity setting ('%s'), only -v and -vv are supported\n", + arg); + return -EINVAL; + } + } + break; + case ARGP_KEY_ARG: + argp_usage(state); + break; + case ARGP_KEY_END: + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +static void stdio_hijack(void) +{ +#ifdef __GLIBC__ + env.stdout = stdout; + env.stderr = stderr; + + if (env.verbosity > VERBOSE_NONE) { + /* nothing to do, output to stdout by default */ + return; + } + + /* stdout and stderr -> buffer */ + fflush(stdout); + + stdout = open_memstream(&env.log_buf, &env.log_cnt); + if (!stdout) { + stdout = env.stdout; + perror("open_memstream"); + return; + } + + stderr = stdout; +#endif +} + +static void stdio_restore(void) +{ +#ifdef __GLIBC__ + if (stdout == env.stdout) + return; + + fclose(stdout); + free(env.log_buf); + + env.log_buf = NULL; + env.log_cnt = 0; + + stdout = env.stdout; + stderr = env.stderr; +#endif +} + +/* + * Determine if test_progs is running as a "flavored" test runner and switch + * into corresponding sub-directory to load correct BPF objects. + * + * This is done by looking at executable name. If it contains "-flavor" + * suffix, then we are running as a flavored test runner. + */ +int cd_flavor_subdir(const char *exec_name) +{ + /* General form of argv[0] passed here is: + * some/path/to/test_progs[-flavor], where -flavor part is optional. + * First cut out "test_progs[-flavor]" part, then extract "flavor" + * part, if it's there. + */ + const char *flavor = strrchr(exec_name, '/'); + + if (!flavor) + return 0; + flavor++; + flavor = strrchr(flavor, '-'); + if (!flavor) + return 0; + flavor++; + printf("Switching to flavor '%s' subdirectory...\n", flavor); + return chdir(flavor); +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + int err, i; + + err = argp_parse(&argp, argc, argv, 0, NULL, &env); + if (err) + return err; + + err = cd_flavor_subdir(argv[0]); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + srand(time(NULL)); - jit_enabled = is_jit_enabled(); + env.jit_enabled = is_jit_enabled(); - if (ac == 2 && strcmp(av[1], "-s") == 0) - verifier_stats = true; + stdio_hijack(); + for (i = 0; i < prog_test_cnt; i++) { + struct prog_test_def *test = &prog_test_defs[i]; -#define CALL -#include <prog_tests/tests.h> -#undef CALL + env.test = test; + test->test_num = i + 1; + + if (!should_run(&env.test_selector, + test->test_num, test->test_name)) + continue; + + test->run_test(); + /* ensure last sub-test is finalized properly */ + if (test->subtest_name) + test__end_subtest(); + + test->tested = true; + if (test->error_cnt) + env.fail_cnt++; + else + env.succ_cnt++; + skip_account(); + + dump_test_log(test, test->error_cnt); + + fprintf(env.stdout, "#%d %s:%s\n", + test->test_num, test->test_name, + test->error_cnt ? "FAIL" : "OK"); + + if (test->need_cgroup_cleanup) + cleanup_cgroup_environment(); + } + stdio_restore(); + printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); + + free(env.test_selector.blacklist.strs); + free(env.test_selector.whitelist.strs); + free(env.test_selector.num_set); + free(env.subtest_selector.blacklist.strs); + free(env.subtest_selector.whitelist.strs); + free(env.subtest_selector.num_set); - printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt); - return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS; + return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 49e0f7d85643..bcfa9ef23fda 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -16,9 +16,10 @@ typedef __u16 __sum16; #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> -#include <linux/tcp.h> +#include <netinet/tcp.h> #include <linux/filter.h> #include <linux/perf_event.h> +#include <linux/socket.h> #include <linux/unistd.h> #include <sys/ioctl.h> @@ -34,13 +35,56 @@ typedef __u16 __sum16; #include "test_iptunnel_common.h" #include "bpf_util.h" -#include "bpf_endian.h" +#include <bpf/bpf_endian.h> #include "trace_helpers.h" #include "flow_dissector_load.h" -extern int error_cnt, pass_cnt; -extern bool jit_enabled; -extern bool verifier_stats; +enum verbosity { + VERBOSE_NONE, + VERBOSE_NORMAL, + VERBOSE_VERY, + VERBOSE_SUPER, +}; + +struct str_set { + const char **strs; + int cnt; +}; + +struct test_selector { + struct str_set whitelist; + struct str_set blacklist; + bool *num_set; + int num_set_len; +}; + +struct test_env { + struct test_selector test_selector; + struct test_selector subtest_selector; + bool verifier_stats; + enum verbosity verbosity; + + bool jit_enabled; + + struct prog_test_def *test; + FILE *stdout; + FILE *stderr; + char *log_buf; + size_t log_cnt; + + int succ_cnt; /* successful tests */ + int sub_succ_cnt; /* successful sub-tests */ + int fail_cnt; /* total failed tests + sub-tests */ + int skip_cnt; /* skipped tests */ +}; + +extern struct test_env env; + +extern void test__force_log(); +extern bool test__start_subtest(const char *name); +extern void test__skip(void); +extern void test__fail(void); +extern int test__join_cgroup(const char *path); #define MAGIC_BYTES 123 @@ -62,14 +106,27 @@ extern struct ipv6_packet pkt_v6; #define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ + int __save_errno = errno; \ if (__ret) { \ - error_cnt++; \ + test__fail(); \ printf("%s:FAIL:%s ", __func__, tag); \ printf(format); \ } else { \ - pass_cnt++; \ - printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\ + printf("%s:PASS:%s %d nsec\n", \ + __func__, tag, duration); \ + } \ + errno = __save_errno; \ + __ret; \ +}) + +#define CHECK_FAIL(condition) ({ \ + int __ret = !!(condition); \ + int __save_errno = errno; \ + if (__ret) { \ + test__fail(); \ + printf("%s:FAIL:%d\n", __func__, __LINE__); \ } \ + errno = __save_errno; \ __ret; \ }) diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c index 9220747c069d..356351c0ac28 100644 --- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c +++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c @@ -120,7 +120,7 @@ int check_ancestor_cgroup_ids(int prog_id) int err = 0; int map_fd; - expected_ids[0] = 0x100000001; /* root cgroup */ + expected_ids[0] = get_cgroup_id("/.."); /* root cgroup */ expected_ids[1] = get_cgroup_id(""); expected_ids[2] = get_cgroup_id(CGROUP_PATH); expected_ids[3] = 0; /* non-existent cgroup */ diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index fb679ac3d4b0..52bf14955797 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include <bpf/bpf_endian.h> #include "bpf_rlimit.h" #include "bpf_util.h" @@ -232,7 +233,8 @@ static struct sock_test tests[] = { /* if (ip == expected && port == expected) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_ip6[3])), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x01000000, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x00000001), 4), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_port)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2), @@ -261,7 +263,8 @@ static struct sock_test tests[] = { /* if (ip == expected && port == expected) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_ip4)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x0100007F, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, + __bpf_constant_ntohl(0x7F000001), 4), BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, offsetof(struct bpf_sock, src_port)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2), diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 3845144e2c91..779e11da979c 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -240,14 +240,14 @@ static int sockmap_init_sockets(int verbose) addr.sin_port = htons(S1_PORT); err = bind(s1, (struct sockaddr *)&addr, sizeof(addr)); if (err < 0) { - perror("bind s1 failed()\n"); + perror("bind s1 failed()"); return errno; } addr.sin_port = htons(S2_PORT); err = bind(s2, (struct sockaddr *)&addr, sizeof(addr)); if (err < 0) { - perror("bind s2 failed()\n"); + perror("bind s2 failed()"); return errno; } @@ -255,14 +255,14 @@ static int sockmap_init_sockets(int verbose) addr.sin_port = htons(S1_PORT); err = listen(s1, 32); if (err < 0) { - perror("listen s1 failed()\n"); + perror("listen s1 failed()"); return errno; } addr.sin_port = htons(S2_PORT); err = listen(s2, 32); if (err < 0) { - perror("listen s1 failed()\n"); + perror("listen s1 failed()"); return errno; } @@ -270,14 +270,14 @@ static int sockmap_init_sockets(int verbose) addr.sin_port = htons(S1_PORT); err = connect(c1, (struct sockaddr *)&addr, sizeof(addr)); if (err < 0 && errno != EINPROGRESS) { - perror("connect c1 failed()\n"); + perror("connect c1 failed()"); return errno; } addr.sin_port = htons(S2_PORT); err = connect(c2, (struct sockaddr *)&addr, sizeof(addr)); if (err < 0 && errno != EINPROGRESS) { - perror("connect c2 failed()\n"); + perror("connect c2 failed()"); return errno; } else if (err < 0) { err = 0; @@ -286,13 +286,13 @@ static int sockmap_init_sockets(int verbose) /* Accept Connecrtions */ p1 = accept(s1, NULL, NULL); if (p1 < 0) { - perror("accept s1 failed()\n"); + perror("accept s1 failed()"); return errno; } p2 = accept(s2, NULL, NULL); if (p2 < 0) { - perror("accept s1 failed()\n"); + perror("accept s1 failed()"); return errno; } @@ -331,25 +331,29 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, FILE *file; int i, fp; - file = fopen(".sendpage_tst.tmp", "w+"); + file = tmpfile(); + if (!file) { + perror("create file for sendpage"); + return 1; + } for (i = 0; i < iov_length * cnt; i++, k++) fwrite(&k, sizeof(char), 1, file); fflush(file); fseek(file, 0, SEEK_SET); - fclose(file); - fp = open(".sendpage_tst.tmp", O_RDONLY); + fp = fileno(file); + clock_gettime(CLOCK_MONOTONIC, &s->start); for (i = 0; i < cnt; i++) { int sent = sendfile(fd, fp, NULL, iov_length); if (!drop && sent < 0) { - perror("send loop error:"); - close(fp); + perror("send loop error"); + fclose(file); return sent; } else if (drop && sent >= 0) { printf("sendpage loop error expected: %i\n", sent); - close(fp); + fclose(file); return -EIO; } @@ -357,7 +361,7 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, s->bytes_sent += sent; } clock_gettime(CLOCK_MONOTONIC, &s->end); - close(fp); + fclose(file); return 0; } @@ -463,7 +467,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, int sent = sendmsg(fd, &msg, flags); if (!drop && sent < 0) { - perror("send loop error:"); + perror("send loop error"); goto out_errno; } else if (drop && sent >= 0) { printf("send loop error expected: %i\n", sent); @@ -499,7 +503,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, total_bytes -= txmsg_pop_total; err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) - perror("recv start time: "); + perror("recv start time"); while (s->bytes_recvd < total_bytes) { if (txmsg_cork) { timeout.tv_sec = 0; @@ -543,7 +547,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (recv < 0) { if (errno != EWOULDBLOCK) { clock_gettime(CLOCK_MONOTONIC, &s->end); - perror("recv failed()\n"); + perror("recv failed()"); goto out_errno; } } @@ -557,7 +561,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, errno = msg_verify_data(&msg, recv, chunk_sz); if (errno) { - perror("data verify msg failed\n"); + perror("data verify msg failed"); goto out_errno; } if (recvp) { @@ -565,7 +569,7 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, recvp, chunk_sz); if (errno) { - perror("data verify msg_peek failed\n"); + perror("data verify msg_peek failed"); goto out_errno; } } @@ -654,7 +658,7 @@ static int sendmsg_test(struct sockmap_options *opt) err = 0; exit(err ? 1 : 0); } else if (rxpid == -1) { - perror("msg_loop_rx: "); + perror("msg_loop_rx"); return errno; } @@ -681,7 +685,7 @@ static int sendmsg_test(struct sockmap_options *opt) s.bytes_recvd, recvd_Bps, recvd_Bps/giga); exit(err ? 1 : 0); } else if (txpid == -1) { - perror("msg_loop_tx: "); + perror("msg_loop_tx"); return errno; } @@ -715,7 +719,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt) /* Ping/Pong data from client to server */ sc = send(c1, buf, sizeof(buf), 0); if (sc < 0) { - perror("send failed()\n"); + perror("send failed()"); return sc; } @@ -748,7 +752,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt) rc = recv(i, buf, sizeof(buf), 0); if (rc < 0) { if (errno != EWOULDBLOCK) { - perror("recv failed()\n"); + perror("recv failed()"); return rc; } } @@ -760,7 +764,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt) sc = send(i, buf, rc, 0); if (sc < 0) { - perror("send failed()\n"); + perror("send failed()"); return sc; } } diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index d008b41b7d8d..9b4d3a68a91a 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -12,8 +12,8 @@ #include <linux/tcp.h> #include <linux/pkt_cls.h> #include <sys/socket.h> -#include "bpf_helpers.h" -#include "bpf_endian.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> /* Sockmap sample program connects a client and a backend together * using cgroups. diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c index 84e81a89e2f9..47e132726203 100644 --- a/tools/testing/selftests/bpf/test_stub.c +++ b/tools/testing/selftests/bpf/test_stub.c @@ -5,6 +5,8 @@ #include <bpf/libbpf.h> #include <string.h> +int extra_prog_load_log_flags = 0; + int bpf_prog_test_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { @@ -15,6 +17,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, attr.prog_type = type; attr.expected_attach_type = 0; attr.prog_flags = BPF_F_TEST_RND_HI32; + attr.log_level = extra_prog_load_log_flags; return bpf_prog_load_xattr(&attr, pobj, prog_fd); } @@ -35,6 +38,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_flags = BPF_F_TEST_RND_HI32; + load_attr.log_level = extra_prog_load_log_flags; return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); } diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c index a3bebd7c68dd..d196e2a4a6e0 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/test_sysctl.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <bpf/bpf_endian.h> #include "bpf_rlimit.h" #include "bpf_util.h" #include "cgroup_helpers.h" @@ -31,6 +32,7 @@ struct sysctl_test { enum bpf_attach_type attach_type; const char *sysctl; int open_flags; + int seek; const char *newval; const char *oldval; enum { @@ -100,7 +102,7 @@ static struct sysctl_test tests[] = { .descr = "ctx:write sysctl:write read ok", .insns = { /* If (write) */ - BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, write)), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 1, 2), @@ -119,6 +121,29 @@ static struct sysctl_test tests[] = { .result = OP_EPERM, }, { + .descr = "ctx:write sysctl:write read ok narrow", + .insns = { + /* u64 w = (u16)write & 1; */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write)), +#else + BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, write) + 2), +#endif + BPF_ALU64_IMM(BPF_AND, BPF_REG_7, 1), + /* return 1 - w; */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SYSCTL, + .sysctl = "kernel/domainname", + .open_flags = O_WRONLY, + .newval = "(none)", /* same as default, should fail anyway */ + .result = OP_EPERM, + }, + { .descr = "ctx:write sysctl:read write reject", .insns = { /* write = X */ @@ -139,7 +164,7 @@ static struct sysctl_test tests[] = { /* If (file_pos == X) */ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 3, 2), /* return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), @@ -152,15 +177,21 @@ static struct sysctl_test tests[] = { .attach_type = BPF_CGROUP_SYSCTL, .sysctl = "kernel/ostype", .open_flags = O_RDONLY, + .seek = 3, .result = SUCCESS, }, { .descr = "ctx:file_pos sysctl:read read ok narrow", .insns = { /* If (file_pos == X) */ +#if __BYTE_ORDER == __LITTLE_ENDIAN BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, offsetof(struct bpf_sysctl, file_pos)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2), +#else + BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sysctl, file_pos) + 3), +#endif + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 4, 2), /* return ALLOW; */ BPF_MOV64_IMM(BPF_REG_0, 1), @@ -173,6 +204,7 @@ static struct sysctl_test tests[] = { .attach_type = BPF_CGROUP_SYSCTL, .sysctl = "kernel/ostype", .open_flags = O_RDONLY, + .seek = 4, .result = SUCCESS, }, { @@ -214,7 +246,8 @@ static struct sysctl_test tests[] = { /* if (ret == expected && */ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, sizeof("tcp_mem") - 1, 6), /* buf == "tcp_mem\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x006d656d5f706374ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d656d00ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -255,7 +288,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:7] == "tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x00656d5f706374ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x7463705f6d650000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -298,12 +332,14 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 16, 14), /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), /* buf[8:16] == "/tcp_mem" && */ - BPF_LD_IMM64(BPF_REG_8, 0x6d656d5f7063742fULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d656dULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), @@ -350,12 +386,14 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 10), /* buf[0:8] == "net/ipv4" && */ - BPF_LD_IMM64(BPF_REG_8, 0x347670692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69707634ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), /* buf[8:16] == "/tcp_me\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x00656d5f7063742fULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x2f7463705f6d6500ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -396,7 +434,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:8] == "net/ip\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000070692f74656eULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x6e65742f69700000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -431,7 +470,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -469,7 +509,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 6, 6), /* buf[0:6] == "Linux\n\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000a78756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e75780a0000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -507,7 +548,8 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, -E2BIG, 6), /* buf[0:6] == "Linux\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x000078756e694cULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x4c696e7578000000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -650,7 +692,8 @@ static struct sysctl_test tests[] = { /* buf[0:4] == "606\0") */ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x00363036, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36303600), 2), /* return DENY; */ BPF_MOV64_IMM(BPF_REG_0, 0), @@ -685,17 +728,20 @@ static struct sysctl_test tests[] = { BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 23, 14), /* buf[0:8] == "3000000 " && */ - BPF_LD_IMM64(BPF_REG_8, 0x2030303030303033ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3330303030303020ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 0), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 10), /* buf[8:16] == "4000000 " && */ - BPF_LD_IMM64(BPF_REG_8, 0x2030303030303034ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3430303030303020ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 8), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 6), /* buf[16:24] == "6000000\0") */ - BPF_LD_IMM64(BPF_REG_8, 0x0030303030303036ULL), + BPF_LD_IMM64(BPF_REG_8, + bpf_be64_to_cpu(0x3630303030303000ULL)), BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_7, 16), BPF_JMP_REG(BPF_JNE, BPF_REG_8, BPF_REG_9, 2), @@ -735,7 +781,8 @@ static struct sysctl_test tests[] = { /* buf[0:3] == "60\0") */ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_7, 0), - BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0x003036, 2), + BPF_JMP_IMM(BPF_JNE, BPF_REG_9, + bpf_ntohl(0x36300000), 2), /* return DENY; */ BPF_MOV64_IMM(BPF_REG_0, 0), @@ -757,7 +804,8 @@ static struct sysctl_test tests[] = { /* sysctl_set_new_value arg2 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), @@ -791,7 +839,7 @@ static struct sysctl_test tests[] = { /* sysctl_set_new_value arg2 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, FIXUP_SYSCTL_VALUE), + BPF_LD_IMM64(BPF_REG_0, FIXUP_SYSCTL_VALUE), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_7), @@ -825,8 +873,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -869,7 +918,8 @@ static struct sysctl_test tests[] = { BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), /* "600 602\0" */ - BPF_LD_IMM64(BPF_REG_0, 0x0032303620303036ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3630302036303200ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -937,7 +987,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -969,8 +1020,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00373730), - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30373700)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1012,7 +1064,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00303036), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x36303000)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1052,7 +1105,8 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x090a0c0d), + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0d0c0a09)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1092,7 +1146,9 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1132,8 +1188,10 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x00362d0a), /* " -6\0" */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + /* " -6\0" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x0a2d3600)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1175,8 +1233,10 @@ static struct sysctl_test tests[] = { /* arg1 (buf) */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8), - BPF_MOV64_IMM(BPF_REG_0, 0x65667830), /* "0xfe" */ - BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + /* "0xfe" */ + BPF_MOV64_IMM(BPF_REG_0, + bpf_ntohl(0x30786665)), + BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1218,11 +1278,14 @@ static struct sysctl_test tests[] = { /* arg1 (buf) 9223372036854775807 */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, 0x0000000000373038ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830370000000000ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1266,11 +1329,14 @@ static struct sysctl_test tests[] = { /* arg1 (buf) 9223372036854775808 */ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -24), - BPF_LD_IMM64(BPF_REG_0, 0x3032373333323239ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3932323333373230ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), - BPF_LD_IMM64(BPF_REG_0, 0x3537373435383633ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3336383534373735ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 8), - BPF_LD_IMM64(BPF_REG_0, 0x0000000000383038ULL), + BPF_LD_IMM64(BPF_REG_0, + bpf_be64_to_cpu(0x3830380000000000ULL)), BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 16), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), @@ -1344,20 +1410,23 @@ static size_t probe_prog_length(const struct bpf_insn *fp) static int fixup_sysctl_value(const char *buf, size_t buf_len, struct bpf_insn *prog, size_t insn_num) { - uint32_t value_num = 0; - uint8_t c, i; + union { + uint8_t raw[sizeof(uint64_t)]; + uint64_t num; + } value = {}; - if (buf_len > sizeof(value_num)) { + if (buf_len > sizeof(value)) { log_err("Value is too big (%zd) to use in fixup", buf_len); return -1; } - - for (i = 0; i < buf_len; ++i) { - c = buf[i]; - value_num |= (c << i * 8); + if (prog[insn_num].code != (BPF_LD | BPF_DW | BPF_IMM)) { + log_err("Can fixup only BPF_LD_IMM64 insns"); + return -1; } - prog[insn_num].imm = value_num; + memcpy(value.raw, buf, buf_len); + prog[insn_num].imm = (uint32_t)value.num; + prog[insn_num + 1].imm = (uint32_t)(value.num >> 32); return 0; } @@ -1442,6 +1511,11 @@ static int access_sysctl(const char *sysctl_path, if (fd < 0) return fd; + if (test->seek && lseek(fd, test->seek, SEEK_SET) == -1) { + log_err("lseek(%d) failed", test->seek); + goto err; + } + if (test->open_flags == O_RDONLY) { char buf[128]; @@ -1499,6 +1573,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) goto err; } + errno = 0; if (access_sysctl(sysctl_path, test) == -1) { if (test->result == OP_EPERM && errno == EPERM) goto out; @@ -1507,7 +1582,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test) } if (test->result != SUCCESS) { - log_err("Unexpected failure"); + log_err("Unexpected success"); goto err; } diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh index f38567ef694b..daa7d1b8d309 100755 --- a/tools/testing/selftests/bpf/test_tc_edt.sh +++ b/tools/testing/selftests/bpf/test_tc_edt.sh @@ -59,7 +59,7 @@ ip netns exec ${NS_SRC} tc filter add dev veth_src egress \ # start the listener ip netns exec ${NS_DST} bash -c \ - "nc -4 -l -s ${IP_DST} -p 9000 >/dev/null &" + "nc -4 -l -p 9000 >/dev/null &" declare -i NC_PID=$! sleep 1 diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index ff0d31d38061..7c76b841b17b 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -62,6 +62,10 @@ cleanup() { if [[ -f "${infile}" ]]; then rm "${infile}" fi + + if [[ -n $server_pid ]]; then + kill $server_pid 2> /dev/null + fi } server_listen() { @@ -77,6 +81,7 @@ client_connect() { verify_data() { wait "${server_pid}" + server_pid= # sha1sum returns two fields [sha1] [filepath] # convert to bash array and access first elem insum=($(sha1sum ${infile})) diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh index d48e51716d19..9b3617d770a5 100755 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh @@ -37,6 +37,9 @@ setup() ns1_exec ip link set lo up ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 + ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0 + ns1_exec sysctl -w net.ipv4.tcp_timestamps=0 + ns1_exec sysctl -w net.ipv4.tcp_sack=0 wait_for_ip 127.0.0.1 wait_for_ip ::1 diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c index 87829c86c746..b9e991d43155 100644 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c @@ -2,6 +2,7 @@ // Copyright (c) 2018 Facebook // Copyright (c) 2019 Cloudflare +#include <limits.h> #include <string.h> #include <stdlib.h> #include <unistd.h> @@ -77,7 +78,7 @@ out: return fd; } -static int get_map_fd_by_prog_id(int prog_id) +static int get_map_fd_by_prog_id(int prog_id, bool *xdp) { struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); @@ -104,6 +105,8 @@ static int get_map_fd_by_prog_id(int prog_id) goto err; } + *xdp = info.type == BPF_PROG_TYPE_XDP; + map_fd = bpf_map_get_fd_by_id(map_ids[0]); if (map_fd < 0) log_err("Failed to get fd by map id %d", map_ids[0]); @@ -113,18 +116,32 @@ err: return map_fd; } -static int run_test(int server_fd, int results_fd) +static int run_test(int server_fd, int results_fd, bool xdp) { int client = -1, srv_client = -1; int ret = 0; __u32 key = 0; - __u64 value = 0; + __u32 key_gen = 1; + __u32 key_mss = 2; + __u32 value = 0; + __u32 value_gen = 0; + __u32 value_mss = 0; if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) { log_err("Can't clear results"); goto err; } + if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) { + log_err("Can't clear results"); + goto err; + } + + if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) { + log_err("Can't clear results"); + goto err; + } + client = connect_to_server(server_fd); if (client == -1) goto err; @@ -140,8 +157,35 @@ static int run_test(int server_fd, int results_fd) goto err; } - if (value != 1) { - log_err("Didn't match syncookie: %llu", value); + if (value == 0) { + log_err("Didn't match syncookie: %u", value); + goto err; + } + + if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) { + log_err("Can't lookup result"); + goto err; + } + + if (xdp && value_gen == 0) { + // SYN packets do not get passed through generic XDP, skip the + // rest of the test. + printf("Skipping XDP cookie check\n"); + goto out; + } + + if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) { + log_err("Can't lookup result"); + goto err; + } + + if (value != value_gen) { + log_err("BPF generated cookie does not match kernel one"); + goto err; + } + + if (value_mss < 536 || value_mss > USHRT_MAX) { + log_err("Unexpected MSS retrieved"); goto err; } @@ -163,13 +207,14 @@ int main(int argc, char **argv) int server_v6 = -1; int results = -1; int err = 0; + bool xdp; if (argc < 2) { fprintf(stderr, "Usage: %s prog_id\n", argv[0]); exit(1); } - results = get_map_fd_by_prog_id(atoi(argv[1])); + results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); if (results < 0) { log_err("Can't get map"); goto err; @@ -194,10 +239,10 @@ int main(int argc, char **argv) if (server_v6 == -1) goto err; - if (run_test(server, results)) + if (run_test(server, results, xdp)) goto err; - if (run_test(server_v6, results)) + if (run_test(server_v6, results, xdp)) goto err; printf("ok\n"); diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h index 7bcfa6207005..6220b95cbd02 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf.h +++ b/tools/testing/selftests/bpf/test_tcpbpf.h @@ -13,5 +13,6 @@ struct tcpbpf_globals { __u64 bytes_received; __u64 bytes_acked; __u32 num_listen; + __u32 num_close_events; }; #endif diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c index 716b4e3be581..3ae127620463 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_user.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c @@ -16,6 +16,9 @@ #include "test_tcpbpf.h" +/* 3 comes from one listening socket + both ends of the connection */ +#define EXPECTED_CLOSE_EVENTS 3 + #define EXPECT_EQ(expected, actual, fmt) \ do { \ if ((expected) != (actual)) { \ @@ -23,13 +26,14 @@ " Actual: %" fmt "\n" \ " Expected: %" fmt "\n", \ (actual), (expected)); \ - goto err; \ + ret--; \ } \ } while (0) int verify_result(const struct tcpbpf_globals *result) { __u32 expected_events; + int ret = 0; expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) | (1 << BPF_SOCK_OPS_RWND_INIT) | @@ -48,15 +52,15 @@ int verify_result(const struct tcpbpf_globals *result) EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32); EXPECT_EQ(0, result->good_cb_test_rv, PRIu32); EXPECT_EQ(1, result->num_listen, PRIu32); + EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32); - return 0; -err: - return -1; + return ret; } int verify_sockopt_result(int sock_map_fd) { __u32 key = 0; + int ret = 0; int res; int rv; @@ -69,9 +73,7 @@ int verify_sockopt_result(int sock_map_fd) rv = bpf_map_lookup_elem(sock_map_fd, &key, &res); EXPECT_EQ(0, rv, "d"); EXPECT_EQ(1, res, "d"); - return 0; -err: - return -1; + return ret; } static int bpf_find_map(const char *test, struct bpf_object *obj, @@ -96,6 +98,7 @@ int main(int argc, char **argv) int error = EXIT_FAILURE; struct bpf_object *obj; int cg_fd = -1; + int retry = 10; __u32 key = 0; int rv; @@ -134,12 +137,20 @@ int main(int argc, char **argv) if (sock_map_fd < 0) goto err; +retry_lookup: rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); goto err; } + if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) { + printf("Unexpected number of close events (%d), retrying!\n", + g.num_close_events); + usleep(100); + goto retry_lookup; + } + if (verify_result(&g)) { printf("FAILED: Wrong stats\n"); goto err; diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 86152d9ae95b..f9765ddf0761 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -17,6 +17,7 @@ #include <linux/rtnetlink.h> #include <signal.h> #include <linux/perf_event.h> +#include <linux/err.h> #include "bpf_rlimit.h" #include "bpf_util.h" @@ -30,28 +31,34 @@ pthread_t tid; int rx_callbacks; -static int dummyfn(void *data, int size) +static void dummyfn(void *ctx, int cpu, void *data, __u32 size) { struct tcp_notifier *t = data; if (t->type != 0xde || t->subtype != 0xad || t->source != 0xbe || t->hash != 0xef) - return 1; + return; rx_callbacks++; - return 0; } -void tcp_notifier_poller(int fd) +void tcp_notifier_poller(struct perf_buffer *pb) { - while (1) - perf_event_poller(fd, dummyfn); + int err; + + while (1) { + err = perf_buffer__poll(pb, 100); + if (err < 0 && err != -EINTR) { + printf("failed perf_buffer__poll: %d\n", err); + return; + } + } } static void *poller_thread(void *arg) { - int fd = *(int *)arg; + struct perf_buffer *pb = arg; - tcp_notifier_poller(fd); + tcp_notifier_poller(pb); return arg; } @@ -60,52 +67,20 @@ int verify_result(const struct tcpnotify_globals *result) return (result->ncalls > 0 && result->ncalls == rx_callbacks ? 0 : 1); } -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -static int setup_bpf_perf_event(int mapfd) -{ - struct perf_event_attr attr = { - .sample_type = PERF_SAMPLE_RAW, - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_BPF_OUTPUT, - }; - int key = 0; - int pmu_fd; - - pmu_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, 0); - if (pmu_fd < 0) - return pmu_fd; - bpf_map_update_elem(mapfd, &key, &pmu_fd, BPF_ANY); - - ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0); - return pmu_fd; -} - int main(int argc, char **argv) { const char *file = "test_tcpnotify_kern.o"; - int prog_fd, map_fd, perf_event_fd; + struct bpf_map *perf_map, *global_map; + struct perf_buffer_opts pb_opts = {}; struct tcpnotify_globals g = {0}; + struct perf_buffer *pb = NULL; const char *cg_path = "/foo"; + int prog_fd, rv, cg_fd = -1; int error = EXIT_FAILURE; struct bpf_object *obj; - int cg_fd = -1; - __u32 key = 0; - int rv; char test_script[80]; - int pmu_fd; cpu_set_t cpuset; + __u32 key = 0; CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); @@ -133,19 +108,24 @@ int main(int argc, char **argv) goto err; } - perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); - if (perf_event_fd < 0) + perf_map = bpf_object__find_map_by_name(obj, "perf_event_map"); + if (!perf_map) { + printf("FAIL:map '%s' not found\n", "perf_event_map"); goto err; + } - map_fd = bpf_find_map(__func__, obj, "global_map"); - if (map_fd < 0) - goto err; + global_map = bpf_object__find_map_by_name(obj, "global_map"); + if (!global_map) { + printf("FAIL:map '%s' not found\n", "global_map"); + return -1; + } - pmu_fd = setup_bpf_perf_event(perf_event_fd); - if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) + pb_opts.sample_cb = dummyfn; + pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts); + if (IS_ERR(pb)) goto err; - pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); + pthread_create(&tid, NULL, poller_thread, pb); sprintf(test_script, "iptables -A INPUT -p tcp --dport %d -j DROP", @@ -162,7 +142,7 @@ int main(int argc, char **argv) TESTPORT); system(test_script); - rv = bpf_map_lookup_elem(map_fd, &key, &g); + rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); goto err; @@ -182,5 +162,7 @@ err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); close(cg_fd); cleanup_cgroup_environment(); + if (!IS_ERR_OR_NULL(pb)) + perf_buffer__free(pb); return error; } diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 84135d5f4b35..87eaa49609a0 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -50,7 +50,7 @@ #define MAX_INSNS BPF_MAXINSNS #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 -#define MAX_NR_MAPS 18 +#define MAX_NR_MAPS 19 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 @@ -61,6 +61,7 @@ #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; static int skips; +static bool verbose = false; struct bpf_test { const char *descr; @@ -84,6 +85,7 @@ struct bpf_test { int fixup_map_array_wo[MAX_FIXUPS]; int fixup_map_array_small[MAX_FIXUPS]; int fixup_sk_storage_map[MAX_FIXUPS]; + int fixup_map_event_output[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; uint32_t insn_processed; @@ -91,7 +93,8 @@ struct bpf_test { enum { UNDEF, ACCEPT, - REJECT + REJECT, + VERBOSE_ACCEPT, } result, result_unpriv; enum bpf_prog_type prog_type; uint8_t flags; @@ -405,10 +408,10 @@ static void update_map(int fd, int index) assert(!bpf_map_update_elem(fd, &index, &value, 0)); } -static int create_prog_dummy1(enum bpf_prog_type prog_type) +static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret) { struct bpf_insn prog[] = { - BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_MOV64_IMM(BPF_REG_0, ret), BPF_EXIT_INSN(), }; @@ -416,14 +419,15 @@ static int create_prog_dummy1(enum bpf_prog_type prog_type) ARRAY_SIZE(prog), "GPL", 0, NULL, 0); } -static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) +static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd, + int idx, int ret) { struct bpf_insn prog[] = { BPF_MOV64_IMM(BPF_REG_3, idx), BPF_LD_MAP_FD(BPF_REG_2, mfd), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), - BPF_MOV64_IMM(BPF_REG_0, 41), + BPF_MOV64_IMM(BPF_REG_0, ret), BPF_EXIT_INSN(), }; @@ -432,10 +436,9 @@ static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx) } static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, - int p1key) + int p1key, int p2key, int p3key) { - int p2key = 1; - int mfd, p1fd, p2fd; + int mfd, p1fd, p2fd, p3fd; mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int), sizeof(int), max_elem, 0); @@ -446,23 +449,24 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem, return -1; } - p1fd = create_prog_dummy1(prog_type); - p2fd = create_prog_dummy2(prog_type, mfd, p2key); - if (p1fd < 0 || p2fd < 0) - goto out; + p1fd = create_prog_dummy_simple(prog_type, 42); + p2fd = create_prog_dummy_loop(prog_type, mfd, p2key, 41); + p3fd = create_prog_dummy_simple(prog_type, 24); + if (p1fd < 0 || p2fd < 0 || p3fd < 0) + goto err; if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0) - goto out; + goto err; if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0) - goto out; + goto err; + if (bpf_map_update_elem(mfd, &p3key, &p3fd, BPF_ANY) < 0) { +err: + close(mfd); + mfd = -1; + } + close(p3fd); close(p2fd); close(p1fd); - return mfd; -out: - close(p2fd); - close(p1fd); - close(mfd); - return -1; } static int create_map_in_map(void) @@ -632,6 +636,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_array_wo = test->fixup_map_array_wo; int *fixup_map_array_small = test->fixup_map_array_small; int *fixup_sk_storage_map = test->fixup_sk_storage_map; + int *fixup_map_event_output = test->fixup_map_event_output; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -680,7 +685,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } if (*fixup_prog1) { - map_fds[4] = create_prog_array(prog_type, 4, 0); + map_fds[4] = create_prog_array(prog_type, 4, 0, 1, 2); do { prog[*fixup_prog1].imm = map_fds[4]; fixup_prog1++; @@ -688,7 +693,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } if (*fixup_prog2) { - map_fds[5] = create_prog_array(prog_type, 8, 7); + map_fds[5] = create_prog_array(prog_type, 8, 7, 1, 2); do { prog[*fixup_prog2].imm = map_fds[5]; fixup_prog2++; @@ -793,6 +798,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, fixup_sk_storage_map++; } while (*fixup_sk_storage_map); } + if (*fixup_map_event_output) { + map_fds[18] = __create_map(BPF_MAP_TYPE_PERF_EVENT_ARRAY, + sizeof(int), sizeof(int), 1, 0); + do { + prog[*fixup_map_event_output].imm = map_fds[18]; + fixup_map_event_output++; + } while (*fixup_map_event_output); + } } static int set_admin(bool admin) @@ -849,6 +862,36 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val, return 0; } +static bool cmp_str_seq(const char *log, const char *exp) +{ + char needle[80]; + const char *p, *q; + int len; + + do { + p = strchr(exp, '\t'); + if (!p) + p = exp + strlen(exp); + + len = p - exp; + if (len >= sizeof(needle) || !len) { + printf("FAIL\nTestcase bug\n"); + return false; + } + strncpy(needle, exp, len); + needle[len] = 0; + q = strstr(log, needle); + if (!q) { + printf("FAIL\nUnexpected verifier log in successful load!\n" + "EXP: %s\nRES:\n", needle); + return false; + } + log = q + len; + exp = p + 1; + } while (*p); + return true; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { @@ -887,14 +930,20 @@ static void do_test_single(struct bpf_test *test, bool unpriv, pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) pflags |= BPF_F_ANY_ALIGNMENT; + if (test->flags & ~3) + pflags |= test->flags; + expected_ret = unpriv && test->result_unpriv != UNDEF ? + test->result_unpriv : test->result; + expected_err = unpriv && test->errstr_unpriv ? + test->errstr_unpriv : test->errstr; memset(&attr, 0, sizeof(attr)); attr.prog_type = prog_type; attr.expected_attach_type = test->expected_attach_type; attr.insns = prog; attr.insns_cnt = prog_len; attr.license = "GPL"; - attr.log_level = 4; + attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4; attr.prog_flags = pflags; fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); @@ -904,14 +953,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, goto close_fds; } - expected_ret = unpriv && test->result_unpriv != UNDEF ? - test->result_unpriv : test->result; - expected_err = unpriv && test->errstr_unpriv ? - test->errstr_unpriv : test->errstr; - alignment_prevented_execution = 0; - if (expected_ret == ACCEPT) { + if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) { if (fd_prog < 0) { printf("FAIL\nFailed to load prog '%s'!\n", strerror(errno)); @@ -922,6 +966,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) alignment_prevented_execution = 1; #endif + if (expected_ret == VERBOSE_ACCEPT && !cmp_str_seq(bpf_vlog, expected_err)) { + goto fail_log; + } } else { if (fd_prog >= 0) { printf("FAIL\nUnexpected success to load!\n"); @@ -947,6 +994,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, } } + if (verbose) + printf(", verifier log:\n%s", bpf_vlog); + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0) { @@ -1087,17 +1137,24 @@ int main(int argc, char **argv) { unsigned int from = 0, to = ARRAY_SIZE(tests); bool unpriv = !is_admin(); + int arg = 1; + + if (argc > 1 && strcmp(argv[1], "-v") == 0) { + arg++; + verbose = true; + argc--; + } if (argc == 3) { - unsigned int l = atoi(argv[argc - 2]); - unsigned int u = atoi(argv[argc - 1]); + unsigned int l = atoi(argv[arg]); + unsigned int u = atoi(argv[arg + 1]); if (l < to && u < to) { from = l; to = u + 1; } } else if (argc == 2) { - unsigned int t = atoi(argv[argc - 1]); + unsigned int t = atoi(argv[arg]); if (t < to) { from = t; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index b47f205f0310..7f989b3e4e22 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -86,128 +86,3 @@ long ksym_get_addr(const char *name) return 0; } - -static int page_size; -static int page_cnt = 8; -static struct perf_event_mmap_page *header; - -int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header) -{ - void *base; - int mmap_size; - - page_size = getpagesize(); - mmap_size = page_size * (page_cnt + 1); - - base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - if (base == MAP_FAILED) { - printf("mmap err\n"); - return -1; - } - - *header = base; - return 0; -} - -int perf_event_mmap(int fd) -{ - return perf_event_mmap_header(fd, &header); -} - -static int perf_event_poll(int fd) -{ - struct pollfd pfd = { .fd = fd, .events = POLLIN }; - - return poll(&pfd, 1, 1000); -} - -struct perf_event_sample { - struct perf_event_header header; - __u32 size; - char data[]; -}; - -static enum bpf_perf_event_ret -bpf_perf_event_print(struct perf_event_header *hdr, void *private_data) -{ - struct perf_event_sample *e = (struct perf_event_sample *)hdr; - perf_event_print_fn fn = private_data; - int ret; - - if (e->header.type == PERF_RECORD_SAMPLE) { - ret = fn(e->data, e->size); - if (ret != LIBBPF_PERF_EVENT_CONT) - return ret; - } else if (e->header.type == PERF_RECORD_LOST) { - struct { - struct perf_event_header header; - __u64 id; - __u64 lost; - } *lost = (void *) e; - printf("lost %lld events\n", lost->lost); - } else { - printf("unknown event type=%d size=%d\n", - e->header.type, e->header.size); - } - - return LIBBPF_PERF_EVENT_CONT; -} - -int perf_event_poller(int fd, perf_event_print_fn output_fn) -{ - enum bpf_perf_event_ret ret; - void *buf = NULL; - size_t len = 0; - - for (;;) { - perf_event_poll(fd); - ret = bpf_perf_event_read_simple(header, page_cnt * page_size, - page_size, &buf, &len, - bpf_perf_event_print, - output_fn); - if (ret != LIBBPF_PERF_EVENT_CONT) - break; - } - free(buf); - - return ret; -} - -int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers, - int num_fds, perf_event_print_fn output_fn) -{ - enum bpf_perf_event_ret ret; - struct pollfd *pfds; - void *buf = NULL; - size_t len = 0; - int i; - - pfds = calloc(num_fds, sizeof(*pfds)); - if (!pfds) - return LIBBPF_PERF_EVENT_ERROR; - - for (i = 0; i < num_fds; i++) { - pfds[i].fd = fds[i]; - pfds[i].events = POLLIN; - } - - for (;;) { - poll(pfds, num_fds, 1000); - for (i = 0; i < num_fds; i++) { - if (!pfds[i].revents) - continue; - - ret = bpf_perf_event_read_simple(headers[i], - page_cnt * page_size, - page_size, &buf, &len, - bpf_perf_event_print, - output_fn); - if (ret != LIBBPF_PERF_EVENT_CONT) - break; - } - } - free(buf); - free(pfds); - - return ret; -} diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 18924f23db1b..0383c9b8adc1 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -2,8 +2,7 @@ #ifndef __TRACE_HELPER_H #define __TRACE_HELPER_H -#include <libbpf.h> -#include <linux/perf_event.h> +#include <bpf/libbpf.h> struct ksym { long addr; @@ -14,12 +13,4 @@ int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); -typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size); - -int perf_event_mmap(int fd); -int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header); -/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */ -int perf_event_poller(int fd, perf_event_print_fn output_fn); -int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers, - int num_fds, perf_event_print_fn output_fn); #endif diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c new file mode 100644 index 000000000000..130553e19eca --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/event_output.c @@ -0,0 +1,94 @@ +/* instructions used to output a skb based software event, produced + * from code snippet: + * struct TMP { + * uint64_t tmp; + * } tt; + * tt.tmp = 5; + * bpf_perf_event_output(skb, &connection_tracking_event_map, 0, + * &tt, sizeof(tt)); + * return 1; + * + * the bpf assembly from llvm is: + * 0: b7 02 00 00 05 00 00 00 r2 = 5 + * 1: 7b 2a f8 ff 00 00 00 00 *(u64 *)(r10 - 8) = r2 + * 2: bf a4 00 00 00 00 00 00 r4 = r10 + * 3: 07 04 00 00 f8 ff ff ff r4 += -8 + * 4: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0ll + * 6: b7 03 00 00 00 00 00 00 r3 = 0 + * 7: b7 05 00 00 08 00 00 00 r5 = 8 + * 8: 85 00 00 00 19 00 00 00 call 25 + * 9: b7 00 00 00 01 00 00 00 r0 = 1 + * 10: 95 00 00 00 00 00 00 00 exit + * + * The reason I put the code here instead of fill_helpers is that map fixup + * is against the insns, instead of filled prog. + */ + +#define __PERF_EVENT_INSNS__ \ + BPF_MOV64_IMM(BPF_REG_2, 5), \ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), \ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), \ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), \ + BPF_LD_MAP_FD(BPF_REG_2, 0), \ + BPF_MOV64_IMM(BPF_REG_3, 0), \ + BPF_MOV64_IMM(BPF_REG_5, 8), \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, \ + BPF_FUNC_perf_event_output), \ + BPF_MOV64_IMM(BPF_REG_0, 1), \ + BPF_EXIT_INSN(), +{ + "perfevent for sockops", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for tc", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for lwt out", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_LWT_OUT, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for xdp", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for socket filter", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for sk_skb", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "perfevent for cgroup skb", + .insns = { __PERF_EVENT_INSNS__ }, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, + .fixup_map_event_output = { 4 }, + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index f0961c58581e..bf0322eb5346 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -744,3 +744,86 @@ .result = ACCEPT, .retval = 2, }, +{ + "jgt32: range bound deduction, reg op imm", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid), + BPF_JMP32_IMM(BPF_JGT, BPF_REG_0, 1, 5), + BPF_MOV32_REG(BPF_REG_6, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "jgt32: range bound deduction, reg1 op reg2, reg1 unknown", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_JMP32_REG(BPF_JGT, BPF_REG_0, BPF_REG_2, 5), + BPF_MOV32_REG(BPF_REG_6, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "jle32: range bound deduction, reg1 op reg2, reg2 unknown", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_cgroup_classid), + BPF_MOV32_IMM(BPF_REG_2, 1), + BPF_JMP32_REG(BPF_JLE, BPF_REG_2, BPF_REG_0, 5), + BPF_MOV32_REG(BPF_REG_6, BPF_REG_0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 32), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_6), + BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c index 1fc4e61e9f9f..1af37187dc12 100644 --- a/tools/testing/selftests/bpf/verifier/loops1.c +++ b/tools/testing/selftests/bpf/verifier/loops1.c @@ -187,3 +187,20 @@ .prog_type = BPF_PROG_TYPE_XDP, .retval = 55, }, +{ + "taken loop with back jump to 1st insn, 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 10), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .retval = 55, +}, diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c new file mode 100644 index 000000000000..02151f8c940f --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -0,0 +1,194 @@ +{ + "precise: test 1", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1), + BPF_EXIT_INSN(), + + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_array_48b = { 1 }, + .result = VERBOSE_ACCEPT, + .errstr = + "26: (85) call bpf_probe_read#4\ + last_idx 26 first_idx 20\ + regs=4 stack=0 before 25\ + regs=4 stack=0 before 24\ + regs=4 stack=0 before 23\ + regs=4 stack=0 before 22\ + regs=4 stack=0 before 20\ + parent didn't have regs=4 stack=0 marks\ + last_idx 19 first_idx 10\ + regs=4 stack=0 before 19\ + regs=200 stack=0 before 18\ + regs=300 stack=0 before 17\ + regs=201 stack=0 before 15\ + regs=201 stack=0 before 14\ + regs=200 stack=0 before 13\ + regs=200 stack=0 before 12\ + regs=200 stack=0 before 11\ + regs=200 stack=0 before 10\ + parent already had regs=0 stack=0 marks", +}, +{ + "precise: test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_LD_MAP_FD(BPF_REG_6, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + + BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8), /* map_value_ptr -= map_value_ptr */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_9), + BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1), + BPF_EXIT_INSN(), + + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_probe_read), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_array_48b = { 1 }, + .result = VERBOSE_ACCEPT, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = + "26: (85) call bpf_probe_read#4\ + last_idx 26 first_idx 22\ + regs=4 stack=0 before 25\ + regs=4 stack=0 before 24\ + regs=4 stack=0 before 23\ + regs=4 stack=0 before 22\ + parent didn't have regs=4 stack=0 marks\ + last_idx 20 first_idx 20\ + regs=4 stack=0 before 20\ + parent didn't have regs=4 stack=0 marks\ + last_idx 19 first_idx 17\ + regs=4 stack=0 before 19\ + regs=200 stack=0 before 18\ + regs=300 stack=0 before 17\ + parent already had regs=0 stack=0 marks", +}, +{ + "precise: cross frame pruning", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "!read_ok", + .result = REJECT, +}, +{ + "precise: ST insn causing spi > allocated_stack", + .insns = { + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "5: (2d) if r4 > r0 goto pc+0\ + last_idx 5 first_idx 5\ + parent didn't have regs=10 stack=0 marks\ + last_idx 4 first_idx 2\ + regs=10 stack=0 before 4\ + regs=10 stack=0 before 3\ + regs=0 stack=1 before 2\ + last_idx 5 first_idx 5\ + parent didn't have regs=1 stack=0 marks", + .result = VERBOSE_ACCEPT, + .retval = -1, +}, +{ + "precise: STX insn causing spi > allocated_stack", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), + BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_MOV64_IMM(BPF_REG_0, -1), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = BPF_F_TEST_STATE_FREQ, + .errstr = "last_idx 6 first_idx 6\ + parent didn't have regs=10 stack=0 marks\ + last_idx 5 first_idx 3\ + regs=10 stack=0 before 5\ + regs=10 stack=0 before 4\ + regs=0 stack=1 before 3\ + last_idx 6 first_idx 6\ + parent didn't have regs=1 stack=0 marks\ + last_idx 5 first_idx 3\ + regs=1 stack=0 before 5", + .result = VERBOSE_ACCEPT, + .retval = -1, +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index ebcbf154c460..604b46151736 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -455,7 +455,7 @@ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7), /* bpf_tail_call() */ - BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 3), BPF_LD_MAP_FD(BPF_REG_2, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), @@ -478,7 +478,7 @@ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), BPF_EMIT_CALL(BPF_FUNC_sk_release), /* bpf_tail_call() */ - BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 3), BPF_LD_MAP_FD(BPF_REG_2, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), @@ -497,7 +497,7 @@ BPF_SK_LOOKUP(sk_lookup_tcp), /* bpf_tail_call() */ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), - BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_3, 3), BPF_LD_MAP_FD(BPF_REG_2, 0), BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c index a9a8f620e71c..94c399d1faca 100644 --- a/tools/testing/selftests/bpf/verifier/runtime_jit.c +++ b/tools/testing/selftests/bpf/verifier/runtime_jit.c @@ -27,6 +27,19 @@ { "runtime/jit: tail_call within bounds, no prog", .insns = { + BPF_MOV64_IMM(BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 1 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "runtime/jit: tail_call within bounds, key 2", + .insns = { BPF_MOV64_IMM(BPF_REG_3, 2), BPF_LD_MAP_FD(BPF_REG_2, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), @@ -35,9 +48,147 @@ }, .fixup_prog1 = { 1 }, .result = ACCEPT, + .retval = 24, +}, +{ + "runtime/jit: tail_call within bounds, key 2 / key 2, first branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 13), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5, 9 }, + .result = ACCEPT, + .retval = 24, +}, +{ + "runtime/jit: tail_call within bounds, key 2 / key 2, second branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 14), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5, 9 }, + .result = ACCEPT, + .retval = 24, +}, +{ + "runtime/jit: tail_call within bounds, key 0 / key 2, first branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 13), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5, 9 }, + .result = ACCEPT, + .retval = 24, +}, +{ + "runtime/jit: tail_call within bounds, key 0 / key 2, second branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 14), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5, 9 }, + .result = ACCEPT, + .retval = 42, +}, +{ + "runtime/jit: tail_call within bounds, different maps, first branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 13), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5 }, + .fixup_prog2 = { 9 }, + .result_unpriv = REJECT, + .errstr_unpriv = "tail_call abusing map_ptr", + .result = ACCEPT, .retval = 1, }, { + "runtime/jit: tail_call within bounds, different maps, second branch", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 14), + BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, cb[0])), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, cb[0])), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_prog1 = { 5 }, + .fixup_prog2 = { 9 }, + .result_unpriv = REJECT, + .errstr_unpriv = "tail_call abusing map_ptr", + .result = ACCEPT, + .retval = 42, +}, +{ "runtime/jit: tail_call out of bounds", .insns = { BPF_MOV64_IMM(BPF_REG_3, 256), diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c index d60a343b1371..842d9155d36c 100644 --- a/tools/testing/selftests/bpf/xdping.c +++ b/tools/testing/selftests/bpf/xdping.c @@ -45,7 +45,7 @@ static int get_stats(int fd, __u16 count, __u32 raddr) printf("\nXDP RTT data:\n"); if (bpf_map_lookup_elem(fd, &raddr, &pinginfo)) { - perror("bpf_map_lookup elem: "); + perror("bpf_map_lookup elem"); return 1; } diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c index 58ed5eeab709..ad41ea69001b 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c @@ -109,7 +109,7 @@ static bool set_watchpoint(pid_t pid, int size, int wp) return false; } -static bool arun_test(int wr_size, int wp_size, int wr, int wp) +static bool run_test(int wr_size, int wp_size, int wr, int wp) { int status; siginfo_t siginfo; diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 8d369b6a2069..66aafe1f5746 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wall +CFLAGS += -Wall -pthread all: +TEST_FILES := with_stress.sh +TEST_PROGS := test_stress.sh TEST_GEN_PROGS = test_memcontrol TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index bdb69599c4bd..8f7131dcf1ff 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -158,6 +158,22 @@ long cg_read_key_long(const char *cgroup, const char *control, const char *key) return atol(ptr + strlen(key)); } +long cg_read_lc(const char *cgroup, const char *control) +{ + char buf[PAGE_SIZE]; + const char delim[] = "\n"; + char *line; + long cnt = 0; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) + cnt++; + + return cnt; +} + int cg_write(const char *cgroup, const char *control, char *buf) { char path[PATH_MAX]; @@ -282,10 +298,12 @@ int cg_enter(const char *cgroup, int pid) int cg_enter_current(const char *cgroup) { - char pidbuf[64]; + return cg_write(cgroup, "cgroup.procs", "0"); +} - snprintf(pidbuf, sizeof(pidbuf), "%d", getpid()); - return cg_write(cgroup, "cgroup.procs", pidbuf); +int cg_enter_current_thread(const char *cgroup) +{ + return cg_write(cgroup, "cgroup.threads", "0"); } int cg_run(const char *cgroup, @@ -410,11 +428,25 @@ int set_oom_adj_score(int pid, int score) return 0; } -char proc_read_text(int pid, const char *item, char *buf, size_t size) +ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; - snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); + if (!pid) + snprintf(path, sizeof(path), "/proc/%s/%s", + thread ? "thread-self" : "self", item); + else + snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); return read_text(path, buf, size); } + +int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) +{ + char buf[PAGE_SIZE]; + + if (proc_read_text(pid, thread, item, buf, sizeof(buf)) < 0) + return -1; + + return strstr(buf, needle) ? 0 : -1; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index c72f28046bfa..49c54fbdb229 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <stdbool.h> #include <stdlib.h> #define PAGE_SIZE 4096 @@ -29,12 +30,14 @@ extern int cg_read_strstr(const char *cgroup, const char *control, const char *needle); extern long cg_read_long(const char *cgroup, const char *control); long cg_read_key_long(const char *cgroup, const char *control, const char *key); +extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); extern int cg_enter(const char *cgroup, int pid); extern int cg_enter_current(const char *cgroup); +extern int cg_enter_current_thread(const char *cgroup); extern int cg_run_nowait(const char *cgroup, int (*fn)(const char *cgroup, void *arg), void *arg); @@ -45,4 +48,5 @@ extern int is_swap_enabled(void); extern int set_oom_adj_score(int pid, int score); extern int cg_wait_for_proc_count(const char *cgroup, int count); extern int cg_killall(const char *cgroup); -extern char proc_read_text(int pid, const char *item, char *buf, size_t size); +extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size); +extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle); diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 79053a4f4783..e19ce940cd6a 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -5,6 +5,9 @@ #include <unistd.h> #include <stdio.h> #include <errno.h> +#include <signal.h> +#include <string.h> +#include <pthread.h> #include "../kselftest.h" #include "cgroup_util.h" @@ -354,6 +357,147 @@ cleanup: return ret; } +static void *dummy_thread_fn(void *arg) +{ + return (void *)(size_t)pause(); +} + +/* + * Test threadgroup migration. + * All threads of a process are migrated together. + */ +static int test_cgcore_proc_migration(const char *root) +{ + int ret = KSFT_FAIL; + int t, c_threads = 0, n_threads = 13; + char *src = NULL, *dst = NULL; + pthread_t threads[n_threads]; + + src = cg_name(root, "cg_src"); + dst = cg_name(root, "cg_dst"); + if (!src || !dst) + goto cleanup; + + if (cg_create(src)) + goto cleanup; + if (cg_create(dst)) + goto cleanup; + + if (cg_enter_current(src)) + goto cleanup; + + for (c_threads = 0; c_threads < n_threads; ++c_threads) { + if (pthread_create(&threads[c_threads], NULL, dummy_thread_fn, NULL)) + goto cleanup; + } + + cg_enter_current(dst); + if (cg_read_lc(dst, "cgroup.threads") != n_threads + 1) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (t = 0; t < c_threads; ++t) { + pthread_cancel(threads[t]); + } + + for (t = 0; t < c_threads; ++t) { + pthread_join(threads[t], NULL); + } + + cg_enter_current(root); + + if (dst) + cg_destroy(dst); + if (src) + cg_destroy(src); + free(dst); + free(src); + return ret; +} + +static void *migrating_thread_fn(void *arg) +{ + int g, i, n_iterations = 1000; + char **grps = arg; + char lines[3][PATH_MAX]; + + for (g = 1; g < 3; ++g) + snprintf(lines[g], sizeof(lines[g]), "0::%s", grps[g] + strlen(grps[0])); + + for (i = 0; i < n_iterations; ++i) { + cg_enter_current_thread(grps[(i % 2) + 1]); + + if (proc_read_strstr(0, 1, "cgroup", lines[(i % 2) + 1])) + return (void *)-1; + } + return NULL; +} + +/* + * Test single thread migration. + * Threaded cgroups allow successful migration of a thread. + */ +static int test_cgcore_thread_migration(const char *root) +{ + int ret = KSFT_FAIL; + char *dom = NULL; + char line[PATH_MAX]; + char *grps[3] = { (char *)root, NULL, NULL }; + pthread_t thr; + void *retval; + + dom = cg_name(root, "cg_dom"); + grps[1] = cg_name(root, "cg_dom/cg_src"); + grps[2] = cg_name(root, "cg_dom/cg_dst"); + if (!grps[1] || !grps[2] || !dom) + goto cleanup; + + if (cg_create(dom)) + goto cleanup; + if (cg_create(grps[1])) + goto cleanup; + if (cg_create(grps[2])) + goto cleanup; + + if (cg_write(grps[1], "cgroup.type", "threaded")) + goto cleanup; + if (cg_write(grps[2], "cgroup.type", "threaded")) + goto cleanup; + + if (cg_enter_current(grps[1])) + goto cleanup; + + if (pthread_create(&thr, NULL, migrating_thread_fn, grps)) + goto cleanup; + + if (pthread_join(thr, &retval)) + goto cleanup; + + if (retval) + goto cleanup; + + snprintf(line, sizeof(line), "0::%s", grps[1] + strlen(grps[0])); + if (proc_read_strstr(0, 1, "cgroup", line)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_enter_current(root); + if (grps[2]) + cg_destroy(grps[2]); + if (grps[1]) + cg_destroy(grps[1]); + if (dom) + cg_destroy(dom); + free(grps[2]); + free(grps[1]); + free(dom); + return ret; +} + #define T(x) { x, #x } struct corecg_test { int (*fn)(const char *root); @@ -366,6 +510,8 @@ struct corecg_test { T(test_cgcore_parent_becomes_threaded), T(test_cgcore_invalid_domain), T(test_cgcore_populated), + T(test_cgcore_proc_migration), + T(test_cgcore_thread_migration), }; #undef T diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c index 8219a30853d2..23d8fa4a3e4e 100644 --- a/tools/testing/selftests/cgroup/test_freezer.c +++ b/tools/testing/selftests/cgroup/test_freezer.c @@ -72,6 +72,7 @@ static int cg_prepare_for_wait(const char *cgroup) if (ret == -1) { debug("Error: inotify_add_watch() failed\n"); close(fd); + fd = -1; } return fd; @@ -448,6 +449,59 @@ cleanup: } /* + * The test creates a cgroups and freezes it. Then it creates a child cgroup + * and populates it with a task. After that it checks that the child cgroup + * is frozen and the parent cgroup remains frozen too. + */ +static int test_cgfreezer_mkdir(const char *root) +{ + int ret = KSFT_FAIL; + char *parent, *child = NULL; + int pid; + + parent = cg_name(root, "cg_test_mkdir_A"); + if (!parent) + goto cleanup; + + child = cg_name(parent, "cg_test_mkdir_B"); + if (!child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_freeze_wait(parent, true)) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + pid = cg_run_nowait(child, child_fn, NULL); + if (pid < 0) + goto cleanup; + + if (cg_wait_for_proc_count(child, 1)) + goto cleanup; + + if (cg_check_frozen(child, true)) + goto cleanup; + + if (cg_check_frozen(parent, true)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (child) + cg_destroy(child); + free(child); + if (parent) + cg_destroy(parent); + free(parent); + return ret; +} + +/* * The test creates two nested cgroups, freezes the parent * and removes the child. Then it checks that the parent cgroup * remains frozen and it's possible to create a new child @@ -648,7 +702,7 @@ static int proc_check_stopped(int pid) char buf[PAGE_SIZE]; int len; - len = proc_read_text(pid, "stat", buf, sizeof(buf)); + len = proc_read_text(pid, 0, "stat", buf, sizeof(buf)); if (len == -1) { debug("Can't get %d stat\n", pid); return -1; @@ -815,6 +869,7 @@ struct cgfreezer_test { T(test_cgfreezer_simple), T(test_cgfreezer_tree), T(test_cgfreezer_forkbomb), + T(test_cgfreezer_mkdir), T(test_cgfreezer_rmdir), T(test_cgfreezer_migrate), T(test_cgfreezer_ptrace), diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh new file mode 100755 index 000000000000..15d9d5896394 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_stress.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./with_stress.sh -s subsys -s fork ./test_core diff --git a/tools/testing/selftests/cgroup/with_stress.sh b/tools/testing/selftests/cgroup/with_stress.sh new file mode 100755 index 000000000000..e28c35008f5b --- /dev/null +++ b/tools/testing/selftests/cgroup/with_stress.sh @@ -0,0 +1,101 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +stress_fork() +{ + while true ; do + /usr/bin/true + sleep 0.01 + done +} + +stress_subsys() +{ + local verb=+ + while true ; do + echo $verb$subsys_ctrl >$sysfs/cgroup.subtree_control + [ $verb = "+" ] && verb=- || verb=+ + # incommensurable period with other stresses + sleep 0.011 + done +} + +init_and_check() +{ + sysfs=`mount -t cgroup2 | head -1 | awk '{ print $3 }'` + if [ ! -d "$sysfs" ]; then + echo "Skipping: cgroup2 is not mounted" >&2 + exit $ksft_skip + fi + + if ! echo +$subsys_ctrl >$sysfs/cgroup.subtree_control ; then + echo "Skipping: cannot enable $subsys_ctrl in $sysfs" >&2 + exit $ksft_skip + fi + + if ! echo -$subsys_ctrl >$sysfs/cgroup.subtree_control ; then + echo "Skipping: cannot disable $subsys_ctrl in $sysfs" >&2 + exit $ksft_skip + fi +} + +declare -a stresses +declare -a stress_pids +duration=5 +rc=0 +subsys_ctrl=cpuset +sysfs= + +while getopts c:d:hs: opt; do + case $opt in + c) + subsys_ctrl=$OPTARG + ;; + d) + duration=$OPTARG + ;; + h) + echo "Usage $0 [ -s stress ] ... [ -d duration ] [-c controller] cmd args .." + echo -e "\t default duration $duration seconds" + echo -e "\t default controller $subsys_ctrl" + exit + ;; + s) + func=stress_$OPTARG + if [ "x$(type -t $func)" != "xfunction" ] ; then + echo "Unknown stress $OPTARG" + exit 1 + fi + stresses+=($func) + ;; + esac +done +shift $((OPTIND - 1)) + +init_and_check + +for s in ${stresses[*]} ; do + $s & + stress_pids+=($!) +done + + +time=0 +start=$(date +%s) + +while [ $time -lt $duration ] ; do + $* + rc=$? + [ $rc -eq 0 ] || break + time=$(($(date +%s) - $start)) +done + +for pid in ${stress_pids[*]} ; do + kill -SIGTERM $pid + wait $pid +done + +exit $rc diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore new file mode 100644 index 000000000000..0dc4f32c6cb8 --- /dev/null +++ b/tools/testing/selftests/clone3/.gitignore @@ -0,0 +1,3 @@ +clone3 +clone3_clear_sighand +clone3_set_tid diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile new file mode 100644 index 000000000000..cf976c732906 --- /dev/null +++ b/tools/testing/selftests/clone3/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -g -I../../../../usr/include/ + +TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid + +include ../lib.mk diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c new file mode 100644 index 000000000000..f14c269a5a18 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Based on Christian Brauner's clone3() example */ + +#define _GNU_SOURCE +#include <errno.h> +#include <inttypes.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sched.h> + +#include "../kselftest.h" +#include "clone3_selftests.h" + +/* + * Different sizes of struct clone_args + */ +#ifndef CLONE3_ARGS_SIZE_V0 +#define CLONE3_ARGS_SIZE_V0 64 +#endif + +enum test_mode { + CLONE3_ARGS_NO_TEST, + CLONE3_ARGS_ALL_0, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG, + CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG, +}; + +static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) +{ + struct clone_args args = { + .flags = flags, + .exit_signal = SIGCHLD, + }; + + struct clone_args_extended { + struct clone_args args; + __aligned_u64 excess_space[2]; + } args_ext; + + pid_t pid = -1; + int status; + + memset(&args_ext, 0, sizeof(args_ext)); + if (size > sizeof(struct clone_args)) + args_ext.excess_space[1] = 1; + + if (size == 0) + size = sizeof(struct clone_args); + + switch (test_mode) { + case CLONE3_ARGS_ALL_0: + args.flags = 0; + args.exit_signal = 0; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG: + args.exit_signal = 0xbadc0ded00000000ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG: + args.exit_signal = 0x0000000080000000ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG: + args.exit_signal = 0x0000000000000100ULL; + break; + case CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG: + args.exit_signal = 0x00000000000000f0ULL; + break; + } + + memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + + pid = sys_clone3((struct clone_args *)&args_ext, size); + if (pid < 0) { + ksft_print_msg("%s - Failed to create new process\n", + strerror(errno)); + return -errno; + } + + if (pid == 0) { + ksft_print_msg("I am the child, my PID is %d\n", getpid()); + _exit(EXIT_SUCCESS); + } + + ksft_print_msg("I am the parent (%d). My child's pid is %d\n", + getpid(), pid); + + if (waitpid(-1, &status, __WALL) < 0) { + ksft_print_msg("Child returned %s\n", strerror(errno)); + return -errno; + } + if (WEXITSTATUS(status)) + return WEXITSTATUS(status); + + return 0; +} + +static void test_clone3(uint64_t flags, size_t size, int expected, + enum test_mode test_mode) +{ + int ret; + + ksft_print_msg( + "[%d] Trying clone3() with flags %#" PRIx64 " (size %zu)\n", + getpid(), flags, size); + ret = call_clone3(flags, size, test_mode); + ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n", + getpid(), ret, expected); + if (ret != expected) + ksft_test_result_fail( + "[%d] Result (%d) is different than expected (%d)\n", + getpid(), ret, expected); + else + ksft_test_result_pass( + "[%d] Result (%d) matches expectation (%d)\n", + getpid(), ret, expected); +} + +int main(int argc, char *argv[]) +{ + pid_t pid; + + uid_t uid = getuid(); + + test_clone3_supported(); + ksft_print_header(); + ksft_set_plan(17); + + /* Just a simple clone3() should return 0.*/ + test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() in a new PID NS.*/ + if (uid == 0) + test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ + test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ + test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with sizeof(struct clone_args) + 8 */ + test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with exit_signal having highest 32 bits non-zero */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); + + /* Do a clone3() with negative 32-bit exit_signal */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG); + + /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG); + + /* Do a clone3() with NSIG < exit_signal < CSIG */ + test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); + + test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + + test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + CLONE3_ARGS_ALL_0); + + test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + CLONE3_ARGS_ALL_0); + + /* Do a clone3() with > page size */ + test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + if (uid == 0) + test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + CLONE3_ARGS_NO_TEST); + + /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ + if (uid == 0) + test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + CLONE3_ARGS_NO_TEST); + else + ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); + + /* Do a clone3() with > page size in a new PID NS */ + test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG, + CLONE3_ARGS_NO_TEST); + + return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail(); +} diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c new file mode 100644 index 000000000000..9e1af8aa7698 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <sys/syscall.h> +#include <sys/wait.h> + +#include "../kselftest.h" +#include "clone3_selftests.h" + +#ifndef CLONE_CLEAR_SIGHAND +#define CLONE_CLEAR_SIGHAND 0x100000000ULL +#endif + +static void nop_handler(int signo) +{ +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void test_clone3_clear_sighand(void) +{ + int ret; + pid_t pid; + struct clone_args args = {}; + struct sigaction act; + + /* + * Check that CLONE_CLEAR_SIGHAND and CLONE_SIGHAND are mutually + * exclusive. + */ + args.flags |= CLONE_CLEAR_SIGHAND | CLONE_SIGHAND; + args.exit_signal = SIGCHLD; + pid = sys_clone3(&args, sizeof(args)); + if (pid > 0) + ksft_exit_fail_msg( + "clone3(CLONE_CLEAR_SIGHAND | CLONE_SIGHAND) succeeded\n"); + + act.sa_handler = nop_handler; + ret = sigemptyset(&act.sa_mask); + if (ret < 0) + ksft_exit_fail_msg("%s - sigemptyset() failed\n", + strerror(errno)); + + act.sa_flags = 0; + + /* Register signal handler for SIGUSR1 */ + ret = sigaction(SIGUSR1, &act, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s - sigaction(SIGUSR1, &act, NULL) failed\n", + strerror(errno)); + + /* Register signal handler for SIGUSR2 */ + ret = sigaction(SIGUSR2, &act, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s - sigaction(SIGUSR2, &act, NULL) failed\n", + strerror(errno)); + + /* Check that CLONE_CLEAR_SIGHAND works. */ + args.flags = CLONE_CLEAR_SIGHAND; + pid = sys_clone3(&args, sizeof(args)); + if (pid < 0) + ksft_exit_fail_msg("%s - clone3(CLONE_CLEAR_SIGHAND) failed\n", + strerror(errno)); + + if (pid == 0) { + ret = sigaction(SIGUSR1, NULL, &act); + if (ret < 0) + exit(EXIT_FAILURE); + + if (act.sa_handler != SIG_DFL) + exit(EXIT_FAILURE); + + ret = sigaction(SIGUSR2, NULL, &act); + if (ret < 0) + exit(EXIT_FAILURE); + + if (act.sa_handler != SIG_DFL) + exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + ret = wait_for_pid(pid); + if (ret) + ksft_exit_fail_msg( + "Failed to clear signal handler for child process\n"); + + ksft_test_result_pass("Cleared signal handlers for child process\n"); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + test_clone3_supported(); + + ksft_set_plan(1); + + test_clone3_clear_sighand(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h new file mode 100644 index 000000000000..a3f2c8ad8bcc --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _CLONE3_SELFTESTS_H +#define _CLONE3_SELFTESTS_H + +#define _GNU_SOURCE +#include <sched.h> +#include <stdint.h> +#include <syscall.h> +#include <linux/types.h> + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +#ifndef __NR_clone3 +#define __NR_clone3 -1 +struct clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; +}; +#endif + +static pid_t sys_clone3(struct clone_args *args, size_t size) +{ + fflush(stdout); + fflush(stderr); + return syscall(__NR_clone3, args, size); +} + +static inline void test_clone3_supported(void) +{ + pid_t pid; + struct clone_args args = {}; + + if (__NR_clone3 < 0) + ksft_exit_skip("clone3() syscall is not supported\n"); + + /* Set to something that will always cause EINVAL. */ + args.exit_signal = -1; + pid = sys_clone3(&args, sizeof(args)); + if (!pid) + exit(EXIT_SUCCESS); + + if (pid > 0) { + wait(NULL); + ksft_exit_fail_msg( + "Managed to create child process with invalid exit_signal\n"); + } + + if (errno == ENOSYS) + ksft_exit_skip("clone3() syscall is not supported\n"); + + ksft_print_msg("clone3() syscall supported\n"); +} + +#endif /* _CLONE3_SELFTESTS_H */ diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c new file mode 100644 index 000000000000..25beb22f35b5 --- /dev/null +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -0,0 +1,397 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Based on Christian Brauner's clone3() example. + * These tests are assuming to be running in the host's + * PID namespace. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sched.h> + +#include "../kselftest.h" +#include "clone3_selftests.h" + +#ifndef MAX_PID_NS_LEVEL +#define MAX_PID_NS_LEVEL 32 +#endif + +static int pipe_1[2]; +static int pipe_2[2]; + +static void child_exit(int ret) +{ + fflush(stdout); + fflush(stderr); + _exit(ret); +} + +static int call_clone3_set_tid(pid_t *set_tid, + size_t set_tid_size, + int flags, + int expected_pid, + bool wait_for_it) +{ + int status; + pid_t pid = -1; + + struct clone_args args = { + .flags = flags, + .exit_signal = SIGCHLD, + .set_tid = ptr_to_u64(set_tid), + .set_tid_size = set_tid_size, + }; + + pid = sys_clone3(&args, sizeof(struct clone_args)); + if (pid < 0) { + ksft_print_msg("%s - Failed to create new process\n", + strerror(errno)); + return -errno; + } + + if (pid == 0) { + int ret; + char tmp = 0; + int exit_code = EXIT_SUCCESS; + + ksft_print_msg("I am the child, my PID is %d (expected %d)\n", + getpid(), set_tid[0]); + if (wait_for_it) { + ksft_print_msg("[%d] Child is ready and waiting\n", + getpid()); + + /* Signal the parent that the child is ready */ + close(pipe_1[0]); + ret = write(pipe_1[1], &tmp, 1); + if (ret != 1) { + ksft_print_msg( + "Writing to pipe returned %d", ret); + exit_code = EXIT_FAILURE; + } + close(pipe_1[1]); + close(pipe_2[1]); + ret = read(pipe_2[0], &tmp, 1); + if (ret != 1) { + ksft_print_msg( + "Reading from pipe returned %d", ret); + exit_code = EXIT_FAILURE; + } + close(pipe_2[0]); + } + + if (set_tid[0] != getpid()) + child_exit(EXIT_FAILURE); + child_exit(exit_code); + } + + if (expected_pid == 0 || expected_pid == pid) { + ksft_print_msg("I am the parent (%d). My child's pid is %d\n", + getpid(), pid); + } else { + ksft_print_msg( + "Expected child pid %d does not match actual pid %d\n", + expected_pid, pid); + return -1; + } + + if (waitpid(pid, &status, 0) < 0) { + ksft_print_msg("Child returned %s\n", strerror(errno)); + return -errno; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void test_clone3_set_tid(pid_t *set_tid, + size_t set_tid_size, + int flags, + int expected, + int expected_pid, + bool wait_for_it) +{ + int ret; + + ksft_print_msg( + "[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n", + getpid(), set_tid[0], flags); + ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid, + wait_for_it); + ksft_print_msg( + "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n", + getpid(), set_tid[0], ret, expected); + if (ret != expected) + ksft_test_result_fail( + "[%d] Result (%d) is different than expected (%d)\n", + getpid(), ret, expected); + else + ksft_test_result_pass( + "[%d] Result (%d) matches expectation (%d)\n", + getpid(), ret, expected); +} +int main(int argc, char *argv[]) +{ + FILE *f; + char buf; + char *line; + int status; + int ret = -1; + size_t len = 0; + int pid_max = 0; + uid_t uid = getuid(); + char proc_path[100] = {0}; + pid_t pid, ns1, ns2, ns3, ns_pid; + pid_t set_tid[MAX_PID_NS_LEVEL * 2]; + + ksft_print_header(); + test_clone3_supported(); + ksft_set_plan(29); + + if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0) + ksft_exit_fail_msg("pipe() failed\n"); + + f = fopen("/proc/sys/kernel/pid_max", "r"); + if (f == NULL) + ksft_exit_fail_msg( + "%s - Could not open /proc/sys/kernel/pid_max\n", + strerror(errno)); + fscanf(f, "%d", &pid_max); + fclose(f); + ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max); + + /* Try invalid settings */ + memset(&set_tid, 0, sizeof(set_tid)); + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, + -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); + + /* + * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 + * nested PID namespace. + */ + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); + + memset(&set_tid, 0xff, sizeof(set_tid)); + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0, + -EINVAL, 0, 0); + + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0); + + /* + * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1 + * nested PID namespace. + */ + test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0); + + memset(&set_tid, 0, sizeof(set_tid)); + /* Try with an invalid PID */ + set_tid[0] = 0; + test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + + set_tid[0] = -1; + test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + + /* Claim that the set_tid array actually contains 2 elements. */ + test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0); + + /* Try it in a new PID namespace */ + if (uid == 0) + test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + else + ksft_test_result_skip("Clone3() with set_tid requires root\n"); + + /* Try with a valid PID (1) this should return -EEXIST. */ + set_tid[0] = 1; + if (uid == 0) + test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0); + else + ksft_test_result_skip("Clone3() with set_tid requires root\n"); + + /* Try it in a new PID namespace */ + if (uid == 0) + test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0); + else + ksft_test_result_skip("Clone3() with set_tid requires root\n"); + + /* pid_max should fail everywhere */ + set_tid[0] = pid_max; + test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + + if (uid == 0) + test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + else + ksft_test_result_skip("Clone3() with set_tid requires root\n"); + + if (uid != 0) { + /* + * All remaining tests require root. Tell the framework + * that all those tests are skipped as non-root. + */ + ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num(); + goto out; + } + + /* Find the current active PID */ + pid = fork(); + if (pid == 0) { + ksft_print_msg("Child has PID %d\n", getpid()); + child_exit(EXIT_SUCCESS); + } + if (waitpid(pid, &status, 0) < 0) + ksft_exit_fail_msg("Waiting for child %d failed", pid); + + /* After the child has finished, its PID should be free. */ + set_tid[0] = pid; + test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0); + + /* This should fail as there is no PID 1 in that namespace */ + test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0); + + /* + * Creating a process with PID 1 in the newly created most nested + * PID namespace and PID 'pid' in the parent PID namespace. This + * needs to work. + */ + set_tid[0] = 1; + set_tid[1] = pid; + test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0); + + ksft_print_msg("unshare PID namespace\n"); + if (unshare(CLONE_NEWPID) == -1) + ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n", + strerror(errno)); + + set_tid[0] = pid; + + /* This should fail as there is no PID 1 in that namespace */ + test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0); + + /* Let's create a PID 1 */ + ns_pid = fork(); + if (ns_pid == 0) { + /* + * This and the next test cases check that all pid-s are + * released on error paths. + */ + set_tid[0] = 43; + set_tid[1] = -1; + test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0); + + set_tid[0] = 43; + set_tid[1] = pid; + test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0); + + ksft_print_msg("Child in PID namespace has PID %d\n", getpid()); + set_tid[0] = 2; + test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0); + + set_tid[0] = 1; + set_tid[1] = -1; + set_tid[2] = pid; + /* This should fail as there is invalid PID at level '1'. */ + test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0); + + set_tid[0] = 1; + set_tid[1] = 42; + set_tid[2] = pid; + /* + * This should fail as there are not enough active PID + * namespaces. Again assuming this is running in the host's + * PID namespace. Not yet nested. + */ + test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0); + + /* + * This should work and from the parent we should see + * something like 'NSpid: pid 42 1'. + */ + test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true); + + child_exit(ksft_cnt.ksft_fail); + } + + close(pipe_1[1]); + close(pipe_2[0]); + while (read(pipe_1[0], &buf, 1) > 0) { + ksft_print_msg("[%d] Child is ready and waiting\n", getpid()); + break; + } + + snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid); + f = fopen(proc_path, "r"); + if (f == NULL) + ksft_exit_fail_msg( + "%s - Could not open %s\n", + strerror(errno), proc_path); + + while (getline(&line, &len, f) != -1) { + if (strstr(line, "NSpid")) { + int i; + + /* Verify that all generated PIDs are as expected. */ + i = sscanf(line, "NSpid:\t%d\t%d\t%d", + &ns3, &ns2, &ns1); + if (i != 3) { + ksft_print_msg( + "Unexpected 'NSPid:' entry: %s", + line); + ns1 = ns2 = ns3 = 0; + } + break; + } + } + fclose(f); + free(line); + close(pipe_2[0]); + + /* Tell the clone3()'d child to finish. */ + write(pipe_2[1], &buf, 1); + close(pipe_2[1]); + + if (waitpid(ns_pid, &status, 0) < 0) { + ksft_print_msg("Child returned %s\n", strerror(errno)); + ret = -errno; + goto out; + } + + if (!WIFEXITED(status)) + ksft_test_result_fail("Child error\n"); + + ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status)); + ksft_cnt.ksft_fail = WEXITSTATUS(status); + + if (ns3 == pid && ns2 == 42 && ns1 == 1) + ksft_test_result_pass( + "PIDs in all namespaces as expected (%d,%d,%d)\n", + ns3, ns2, ns1); + else + ksft_test_result_fail( + "PIDs in all namespaces not as expected (%d,%d,%d)\n", + ns3, ns2, ns1); +out: + ret = 0; + + return !ret ? ksft_exit_pass() : ksft_exit_fail(); +} diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile new file mode 100644 index 000000000000..607c2acd2082 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include + +TEST_GEN_PROGS = dmabuf-heap + +include ../lib.mk diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c new file mode 100644 index 000000000000..cd5e1f602ac9 --- /dev/null +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/types.h> + +#include <linux/dma-buf.h> +#include <drm/drm.h> + +#include "../../../../include/uapi/linux/dma-heap.h" + +#define DEVPATH "/dev/dma_heap" + +static int check_vgem(int fd) +{ + drm_version_t version = { 0 }; + char name[5]; + int ret; + + version.name_len = 4; + version.name = name; + + ret = ioctl(fd, DRM_IOCTL_VERSION, &version); + if (ret) + return 0; + + return !strcmp(name, "vgem"); +} + +static int open_vgem(void) +{ + int i, fd; + const char *drmstr = "/dev/dri/card"; + + fd = -1; + for (i = 0; i < 16; i++) { + char name[80]; + + snprintf(name, 80, "%s%u", drmstr, i); + + fd = open(name, O_RDWR); + if (fd < 0) + continue; + + if (!check_vgem(fd)) { + close(fd); + fd = -1; + continue; + } else { + break; + } + } + return fd; +} + +static int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle) +{ + struct drm_prime_handle import_handle = { + .fd = dma_buf_fd, + .flags = 0, + .handle = 0, + }; + int ret; + + ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle); + if (ret == 0) + *handle = import_handle.handle; + return ret; +} + +static void close_handle(int vgem_fd, uint32_t handle) +{ + struct drm_gem_close close = { + .handle = handle, + }; + + ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static int dmabuf_heap_open(char *name) +{ + int ret, fd; + char buf[256]; + + ret = snprintf(buf, 256, "%s/%s", DEVPATH, name); + if (ret < 0) { + printf("snprintf failed!\n"); + return ret; + } + + fd = open(buf, O_RDWR); + if (fd < 0) + printf("open %s failed!\n", buf); + return fd; +} + +static int dmabuf_heap_alloc_fdflags(int fd, size_t len, unsigned int fd_flags, + unsigned int heap_flags, int *dmabuf_fd) +{ + struct dma_heap_allocation_data data = { + .len = len, + .fd = 0, + .fd_flags = fd_flags, + .heap_flags = heap_flags, + }; + int ret; + + if (!dmabuf_fd) + return -EINVAL; + + ret = ioctl(fd, DMA_HEAP_IOCTL_ALLOC, &data); + if (ret < 0) + return ret; + *dmabuf_fd = (int)data.fd; + return ret; +} + +static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags, + int *dmabuf_fd) +{ + return dmabuf_heap_alloc_fdflags(fd, len, O_RDWR | O_CLOEXEC, flags, + dmabuf_fd); +} + +static void dmabuf_sync(int fd, int start_stop) +{ + struct dma_buf_sync sync = { + .flags = start_stop | DMA_BUF_SYNC_RW, + }; + int ret; + + ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); + if (ret) + printf("sync failed %d\n", errno); +} + +#define ONE_MEG (1024 * 1024) + +static int test_alloc_and_import(char *heap_name) +{ + int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1; + uint32_t handle = 0; + void *p = NULL; + int ret; + + printf("Testing heap: %s\n", heap_name); + + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + printf("Allocating 1 MEG\n"); + ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd); + if (ret) { + printf("Allocation Failed!\n"); + ret = -1; + goto out; + } + /* mmap and write a simple pattern */ + p = mmap(NULL, + ONE_MEG, + PROT_READ | PROT_WRITE, + MAP_SHARED, + dmabuf_fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + ret = -1; + goto out; + } + printf("mmap passed\n"); + + dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); + memset(p, 1, ONE_MEG / 2); + memset((char *)p + ONE_MEG / 2, 0, ONE_MEG / 2); + dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + + importer_fd = open_vgem(); + if (importer_fd < 0) { + ret = importer_fd; + printf("Failed to open vgem\n"); + goto out; + } + + ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle); + if (ret < 0) { + printf("Failed to import buffer\n"); + goto out; + } + printf("import passed\n"); + + dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START); + memset(p, 0xff, ONE_MEG); + dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END); + printf("syncs passed\n"); + + close_handle(importer_fd, handle); + ret = 0; + +out: + if (p) + munmap(p, ONE_MEG); + if (importer_fd >= 0) + close(importer_fd); + if (dmabuf_fd >= 0) + close(dmabuf_fd); + if (heap_fd >= 0) + close(heap_fd); + + return ret; +} + +/* Test the ioctl version compatibility w/ a smaller structure then expected */ +static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags, + int *dmabuf_fd) +{ + int ret; + unsigned int older_alloc_ioctl; + struct dma_heap_allocation_data_smaller { + __u64 len; + __u32 fd; + __u32 fd_flags; + } data = { + .len = len, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + }; + + older_alloc_ioctl = _IOWR(DMA_HEAP_IOC_MAGIC, 0x0, + struct dma_heap_allocation_data_smaller); + if (!dmabuf_fd) + return -EINVAL; + + ret = ioctl(fd, older_alloc_ioctl, &data); + if (ret < 0) + return ret; + *dmabuf_fd = (int)data.fd; + return ret; +} + +/* Test the ioctl version compatibility w/ a larger structure then expected */ +static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags, + int *dmabuf_fd) +{ + int ret; + unsigned int newer_alloc_ioctl; + struct dma_heap_allocation_data_bigger { + __u64 len; + __u32 fd; + __u32 fd_flags; + __u64 heap_flags; + __u64 garbage1; + __u64 garbage2; + __u64 garbage3; + } data = { + .len = len, + .fd = 0, + .fd_flags = O_RDWR | O_CLOEXEC, + .heap_flags = flags, + .garbage1 = 0xffffffff, + .garbage2 = 0x88888888, + .garbage3 = 0x11111111, + }; + + newer_alloc_ioctl = _IOWR(DMA_HEAP_IOC_MAGIC, 0x0, + struct dma_heap_allocation_data_bigger); + if (!dmabuf_fd) + return -EINVAL; + + ret = ioctl(fd, newer_alloc_ioctl, &data); + if (ret < 0) + return ret; + + *dmabuf_fd = (int)data.fd; + return ret; +} + +static int test_alloc_compat(char *heap_name) +{ + int heap_fd = -1, dmabuf_fd = -1; + int ret; + + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + printf("Testing (theoretical)older alloc compat\n"); + ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd); + if (ret) { + printf("Older compat allocation failed!\n"); + ret = -1; + goto out; + } + close(dmabuf_fd); + + printf("Testing (theoretical)newer alloc compat\n"); + ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd); + if (ret) { + printf("Newer compat allocation failed!\n"); + ret = -1; + goto out; + } + printf("Ioctl compatibility tests passed\n"); +out: + if (dmabuf_fd >= 0) + close(dmabuf_fd); + if (heap_fd >= 0) + close(heap_fd); + + return ret; +} + +static int test_alloc_errors(char *heap_name) +{ + int heap_fd = -1, dmabuf_fd = -1; + int ret; + + heap_fd = dmabuf_heap_open(heap_name); + if (heap_fd < 0) + return -1; + + printf("Testing expected error cases\n"); + ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd); + if (!ret) { + printf("Did not see expected error (invalid fd)!\n"); + ret = -1; + goto out; + } + + ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd); + if (!ret) { + printf("Did not see expected error (invalid heap flags)!\n"); + ret = -1; + goto out; + } + + ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG, + ~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd); + if (!ret) { + printf("Did not see expected error (invalid fd flags)!\n"); + ret = -1; + goto out; + } + + printf("Expected error checking passed\n"); +out: + if (dmabuf_fd >= 0) + close(dmabuf_fd); + if (heap_fd >= 0) + close(heap_fd); + + return ret; +} + +int main(void) +{ + DIR *d; + struct dirent *dir; + int ret = -1; + + d = opendir(DEVPATH); + if (!d) { + printf("No %s directory?\n", DEVPATH); + return -1; + } + + while ((dir = readdir(d)) != NULL) { + if (!strncmp(dir->d_name, ".", 2)) + continue; + if (!strncmp(dir->d_name, "..", 3)) + continue; + + ret = test_alloc_and_import(dir->d_name); + if (ret) + break; + + ret = test_alloc_compat(dir->d_name); + if (ret) + break; + + ret = test_alloc_errors(dir->d_name); + if (ret) + break; + } + closedir(d); + + return ret; +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh new file mode 100755 index 000000000000..89b55e946eed --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh @@ -0,0 +1,129 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test generic devlink-trap functionality over mlxsw. These tests are not +# specific to a single trap, but do not check the devlink-trap common +# infrastructure either. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + dev_del_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +dev_del_test() +{ + local trap_name="source_mac_is_multicast" + local smac=01:02:03:04:05:06 + local num_iter=5 + local mz_pid + local i + + $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -q & + mz_pid=$! + + # The purpose of this test is to make sure we correctly dismantle a + # port while packets are trapped from it. This is done by reloading the + # the driver while the 'ingress_smac_mc_drop' trap is triggered. + RET=0 + + for i in $(seq 1 $num_iter); do + log_info "Iteration $i / $num_iter" + + devlink_trap_action_set $trap_name "trap" + sleep 1 + + devlink_reload + # Allow netdevices to be re-created following the reload + sleep 20 + + cleanup + setup_prepare + setup_wait + done + + log_test "Device delete" + + kill $mz_pid && wait $mz_pid &> /dev/null +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh new file mode 100755 index 000000000000..58cdbfb608e9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh @@ -0,0 +1,437 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L2 drops functionality over mlxsw. Each registered L2 drop +# packet trap is tested to make sure it is triggered under the right +# conditions. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + source_mac_is_multicast_test + vlan_tag_mismatch_test + ingress_vlan_filter_test + ingress_stp_filter_test + port_list_is_empty_test + port_loopback_filter_test +" +NUM_NETIFS=4 +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 +} + +h2_destroy() +{ + simple_if_fini $h2 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +source_mac_is_multicast_test() +{ + local trap_name="source_mac_is_multicast" + local smac=01:02:03:04:05:06 + local group_name="l2_drops" + local mz_pid + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower src_mac $smac action drop + + $MZ $h1 -c 0 -p 100 -a $smac -b bcast -t ip -d 1msec -q & + mz_pid=$! + + RET=0 + + devlink_trap_drop_test $trap_name $group_name $swp2 + + log_test "Source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip +} + +__vlan_tag_mismatch_test() +{ + local trap_name="vlan_tag_mismatch" + local dmac=de:ad:be:ef:13:37 + local group_name="l2_drops" + local opt=$1; shift + local mz_pid + + # Remove PVID flag. This should prevent untagged and prio-tagged + # packets from entering the bridge. + bridge vlan add vid 1 dev $swp1 untagged master + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Add PVID and make sure packets are no longer dropped. + bridge vlan add vid 1 dev $swp1 pvid untagged master + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip +} + +vlan_tag_mismatch_untagged_test() +{ + RET=0 + + __vlan_tag_mismatch_test + + log_test "VLAN tag mismatch - untagged packets" +} + +vlan_tag_mismatch_vid_0_test() +{ + RET=0 + + __vlan_tag_mismatch_test "-Q 0" + + log_test "VLAN tag mismatch - prio-tagged packets" +} + +vlan_tag_mismatch_test() +{ + vlan_tag_mismatch_untagged_test + vlan_tag_mismatch_vid_0_test +} + +ingress_vlan_filter_test() +{ + local trap_name="ingress_vlan_filter" + local dmac=de:ad:be:ef:13:37 + local group_name="l2_drops" + local mz_pid + local vid=10 + + bridge vlan add vid $vid dev $swp2 master + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Add the VLAN on the bridge port and make sure packets are no longer + # dropped. + bridge vlan add vid $vid dev $swp1 master + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Ingress VLAN filter" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip + + bridge vlan del vid $vid dev $swp1 master + bridge vlan del vid $vid dev $swp2 master +} + +__ingress_stp_filter_test() +{ + local trap_name="ingress_spanning_tree_filter" + local dmac=de:ad:be:ef:13:37 + local group_name="l2_drops" + local state=$1; shift + local mz_pid + local vid=20 + + bridge vlan add vid $vid dev $swp2 master + bridge vlan add vid $vid dev $swp1 master + ip link set dev $swp1 type bridge_slave state $state + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Change STP state to forwarding and make sure packets are no longer + # dropped. + ip link set dev $swp1 type bridge_slave state 3 + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip + + bridge vlan del vid $vid dev $swp1 master + bridge vlan del vid $vid dev $swp2 master +} + +ingress_stp_filter_listening_test() +{ + local state=$1; shift + + RET=0 + + __ingress_stp_filter_test $state + + log_test "Ingress STP filter - listening state" +} + +ingress_stp_filter_learning_test() +{ + local state=$1; shift + + RET=0 + + __ingress_stp_filter_test $state + + log_test "Ingress STP filter - learning state" +} + +ingress_stp_filter_test() +{ + ingress_stp_filter_listening_test 1 + ingress_stp_filter_learning_test 2 +} + +port_list_is_empty_uc_test() +{ + local trap_name="port_list_is_empty" + local dmac=de:ad:be:ef:13:37 + local group_name="l2_drops" + local mz_pid + + # Disable unicast flooding on both ports, so that packets cannot egress + # any port. + ip link set dev $swp1 type bridge_slave flood off + ip link set dev $swp2 type bridge_slave flood off + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Allow packets to be flooded to one port. + ip link set dev $swp2 type bridge_slave flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port list is empty - unicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip + + ip link set dev $swp1 type bridge_slave flood on +} + +port_list_is_empty_mc_test() +{ + local trap_name="port_list_is_empty" + local dmac=01:00:5e:00:00:01 + local group_name="l2_drops" + local dip=239.0.0.1 + local mz_pid + + # Disable multicast flooding on both ports, so that packets cannot + # egress any port. We also need to flush IP addresses from the bridge + # in order to prevent packets from being flooded to the router port. + ip link set dev $swp1 type bridge_slave mcast_flood off + ip link set dev $swp2 type bridge_slave mcast_flood off + ip address flush dev br0 + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Allow packets to be flooded to one port. + ip link set dev $swp2 type bridge_slave mcast_flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port list is empty - multicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip + + ip link set dev $swp1 type bridge_slave mcast_flood on +} + +port_list_is_empty_test() +{ + port_list_is_empty_uc_test + port_list_is_empty_mc_test +} + +port_loopback_filter_uc_test() +{ + local trap_name="port_loopback_filter" + local dmac=de:ad:be:ef:13:37 + local group_name="l2_drops" + local mz_pid + + # Make sure packets can only egress the input port. + ip link set dev $swp2 type bridge_slave flood off + + RET=0 + + tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \ + flower dst_mac $dmac action drop + + $MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp2 + + # Allow packets to be flooded. + ip link set dev $swp2 type bridge_slave flood on + devlink_trap_action_set $trap_name "trap" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle when packets should not be dropped" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with when packets should not be dropped" + + tc_check_packets "dev $swp2 egress" 101 0 + check_fail $? "Packets not forwarded when should" + + devlink_trap_action_set $trap_name "drop" + + log_test "Port loopback filter - unicast" + + devlink_trap_drop_cleanup $mz_pid $swp2 ip +} + +port_loopback_filter_test() +{ + port_loopback_filter_uc_test +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh new file mode 100755 index 000000000000..d88d8e47d11b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -0,0 +1,675 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L3 drops functionality over mlxsw. Each registered L3 drop +# packet trap is tested to make sure it is triggered under the right +# conditions. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + non_ip_test + uc_dip_over_mc_dmac_test + dip_is_loopback_test + sip_is_mc_test + sip_is_loopback_test + ip_header_corrupted_test + ipv4_sip_is_limited_bc_test + ipv6_mc_dip_reserved_scope_test + ipv6_mc_dip_interface_local_scope_test + blackhole_route_test + irif_disabled_test + erif_disabled_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 $h2_ipv4/24 $h2_ipv6/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 $h2_ipv4/24 $h2_ipv6/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp2 clsact +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h1mac=$(mac_get $h1) + rp1mac=$(mac_get $rp1) + + h1_ipv4=192.0.2.1 + h2_ipv4=198.51.100.1 + h1_ipv6=2001:db8:1::1 + h2_ipv6=2001:db8:2::1 + + vrf_prepare + forwarding_enable + + h1_create + h2_create + + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +ping_check() +{ + trap_name=$1; shift + + devlink_trap_action_set $trap_name "trap" + ping_do $h1 $h2_ipv4 + check_err $? "Packets that should not be trapped were trapped" + devlink_trap_action_set $trap_name "drop" +} + +non_ip_test() +{ + local trap_name="non_ip" + local group_name="l3_drops" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + # Generate non-IP packets to the router + $MZ $h1 -c 0 -p 100 -d 1msec -B $h2_ipv4 -q "$rp1mac $h1mac \ + 00:00 de:ad:be:ef" & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "Non IP" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" +} + +__uc_dip_over_mc_dmac_test() +{ + local desc=$1; shift + local proto=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="uc_dip_over_mc_dmac" + local group_name="l3_drops" + local dmac=01:02:03:04:05:06 + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower ip_proto udp src_port 54321 dst_port 12345 action drop + + # Generate IP packets with a unicast IP and a multicast destination MAC + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $dmac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "Unicast destination IP over multicast destination MAC: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto +} + +uc_dip_over_mc_dmac_test() +{ + __uc_dip_over_mc_dmac_test "IPv4" "ip" $h2_ipv4 + __uc_dip_over_mc_dmac_test "IPv6" "ipv6" $h2_ipv6 "-6" +} + +__sip_is_loopback_test() +{ + local desc=$1; shift + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="sip_is_loopback_address" + local group_name="l3_drops" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with loopback source IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \ + -b $rp1mac -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "Source IP is loopback address: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto +} + +sip_is_loopback_test() +{ + __sip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" $h2_ipv4 + __sip_is_loopback_test "IPv6" "ipv6" "::1" $h2_ipv6 "-6" +} + +__dip_is_loopback_test() +{ + local desc=$1; shift + local proto=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="dip_is_loopback_address" + local group_name="l3_drops" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with loopback destination IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "Destination IP is loopback address: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto +} + +dip_is_loopback_test() +{ + __dip_is_loopback_test "IPv4" "ip" "127.0.0.0/8" + __dip_is_loopback_test "IPv6" "ipv6" "::1" "-6" +} + +__sip_is_mc_test() +{ + local desc=$1; shift + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local trap_name="sip_is_mc" + local group_name="l3_drops" + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with multicast source IP + $MZ $h1 $flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip \ + -b $rp1mac -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "Source IP is multicast: $desc" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto +} + +sip_is_mc_test() +{ + __sip_is_mc_test "IPv4" "ip" "239.1.1.1" $h2_ipv4 + __sip_is_mc_test "IPv6" "ipv6" "FF02::2" $h2_ipv6 "-6" +} + +ipv4_sip_is_limited_bc_test() +{ + local trap_name="ipv4_sip_is_limited_bc" + local group_name="l3_drops" + local sip=255.255.255.255 + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower src_ip $sip action drop + + # Generate packets with limited broadcast source IP + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -A $sip -b $rp1mac \ + -B $h2_ipv4 -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "IPv4 source IP is limited broadcast" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" +} + +ipv4_payload_get() +{ + local ipver=$1; shift + local ihl=$1; shift + local checksum=$1; shift + + p=$(: + )"08:00:"$( : ETH type + )"$ipver"$( : IP version + )"$ihl:"$( : IHL + )"00:"$( : IP TOS + )"00:F4:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"$checksum:"$( : IP header csum + )"$h1_ipv4:"$( : IP saddr + )"$h2_ipv4:"$( : IP daddr + ) + echo $p +} + +__ipv4_header_corrupted_test() +{ + local desc=$1; shift + local ipver=$1; shift + local ihl=$1; shift + local checksum=$1; shift + local trap_name="ip_header_corrupted" + local group_name="l3_drops" + local payload + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + payload=$(ipv4_payload_get $ipver $ihl $checksum) + + # Generate packets with corrupted IP header + $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "IP header corrupted: $desc: IPv4" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" +} + +ipv6_payload_get() +{ + local ipver=$1; shift + + p=$(: + )"86:DD:"$( : ETH type + )"$ipver"$( : IP version + )"0:0:"$( : Traffic class + )"0:00:00:"$( : Flow label + )"00:00:"$( : Payload length + )"01:"$( : Next header + )"04:"$( : Hop limit + )"$h1_ipv6:"$( : IP saddr + )"$h2_ipv6:"$( : IP daddr + ) + echo $p +} + +__ipv6_header_corrupted_test() +{ + local desc=$1; shift + local ipver=$1; shift + local trap_name="ip_header_corrupted" + local group_name="l3_drops" + local payload + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $h2_ipv4 action drop + + payload=$(ipv6_payload_get $ipver) + + # Generate packets with corrupted IP header + $MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "IP header corrupted: $desc: IPv6" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ip" +} + +ip_header_corrupted_test() +{ + # Each test uses one wrong value. The three values below are correct. + local ipv="4" + local ihl="5" + local checksum="00:F4" + + __ipv4_header_corrupted_test "wrong IP version" 5 $ihl $checksum + __ipv4_header_corrupted_test "wrong IHL" $ipv 4 $checksum + __ipv4_header_corrupted_test "wrong checksum" $ipv $ihl "00:00" + __ipv6_header_corrupted_test "wrong IP version" 5 +} + +ipv6_mc_dip_reserved_scope_test() +{ + local trap_name="ipv6_mc_dip_reserved_scope" + local group_name="l3_drops" + local dip=FF00:: + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with reserved scope destination IP + $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \ + "33:33:00:00:00:00" -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "IPv6 multicast destination IP reserved scope" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" +} + +ipv6_mc_dip_interface_local_scope_test() +{ + local trap_name="ipv6_mc_dip_interface_local_scope" + local group_name="l3_drops" + local dip=FF01:: + local mz_pid + + RET=0 + + ping_check $trap_name + + tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \ + flower dst_ip $dip action drop + + # Generate packets with interface local scope destination IP + $MZ $h1 -6 -t udp "sp=54321,dp=12345" -c 0 -p 100 -b \ + "33:33:00:00:00:00" -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + + log_test "IPv6 multicast destination IP interface-local scope" + + devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" +} + +__blackhole_route_test() +{ + local flags=$1; shift + local subnet=$1; shift + local proto=$1; shift + local dip=$1; shift + local ip_proto=${1:-"icmp"}; shift + local trap_name="blackhole_route" + local group_name="l3_drops" + local mz_pid + + RET=0 + + ping_check $trap_name + + ip -$flags route add blackhole $subnet + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower skip_hw dst_ip $dip ip_proto $ip_proto action drop + + # Generate packets to the blackhole route + $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \ + -B $dip -d 1msec -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $rp2 + log_test "Blackhole route: IPv$flags" + + devlink_trap_drop_cleanup $mz_pid $rp2 $proto + ip -$flags route del blackhole $subnet +} + +blackhole_route_test() +{ + __blackhole_route_test "4" "198.51.100.0/30" "ip" $h2_ipv4 + __blackhole_route_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6 "icmpv6" +} + +irif_disabled_test() +{ + local trap_name="irif_disabled" + local group_name="l3_drops" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + + # When RIF of a physical port ("Sub-port RIF") is destroyed, we first + # block the STP of the {Port, VLAN} so packets cannot get into the RIF. + # Using bridge enables us to see this trap because when bridge is + # destroyed, there is a small time window that packets can go into the + # RIF, while it is disabled. + ip link add dev br0 type bridge + ip link set dev $rp1 master br0 + ip address flush dev $rp1 + __addr_add_del br0 add 192.0.2.2/24 + ip li set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + # Generate packets to h2 through br0 RIF that will be removed later + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp1mac \ + -B $h2_ipv4 -q & + mz_pid=$! + + # Wait before removing br0 RIF to allow packets to go into the bridge. + sleep 1 + + # Flushing address will dismantle the RIF + ip address flush dev br0 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Ingress RIF disabled" + + kill $mz_pid && wait $mz_pid &> /dev/null + ip link set dev $rp1 nomaster + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + +erif_disabled_test() +{ + local trap_name="erif_disabled" + local group_name="l3_drops" + local t0_packets t0_bytes + local t1_packets t1_bytes + local mz_pid + + RET=0 + + ping_check $trap_name + + devlink_trap_action_set $trap_name "trap" + ip link add dev br0 type bridge + ip add flush dev $rp1 + ip link set dev $rp1 master br0 + __addr_add_del br0 add 192.0.2.2/24 + ip link set dev br0 up + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + rp2mac=$(mac_get $rp2) + + # Generate packets that should go out through br0 RIF that will be + # removed later + $MZ $h2 -t udp "sp=54321,dp=12345" -c 0 -p 100 -a own -b $rp2mac \ + -B 192.0.2.1 -q & + mz_pid=$! + + sleep 5 + # In order to see this trap we need a route that points to disabled RIF. + # When ipv6 address is flushed, there is a delay and the routes are + # deleted before the RIF and we cannot get state that we have route + # to disabled RIF. + # Delete IPv6 address first and then check this trap with flushing IPv4. + ip -6 add flush dev br0 + ip -4 add flush dev br0 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + check_err 1 "Trap stats idle when packets should be trapped" + fi + + log_test "Egress RIF disabled" + + kill $mz_pid && wait $mz_pid &> /dev/null + ip link set dev $rp1 nomaster + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + ip link del dev br0 type bridge + devlink_trap_action_set $trap_name "drop" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh new file mode 100755 index 000000000000..2bc6df42d597 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -0,0 +1,557 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap L3 exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +---------------------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/24 | +# | | 2001:db8:1::1/64 | +# | | | +# | | default via 192.0.2.2 | +# | | default via 2001:db8:1::2 | +# +----|----------------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | + $rp1 | +# | 192.0.2.2/24 | +# | 2001:db8:1::2/64 | +# | | +# | 2001:db8:2::2/64 | +# | 198.51.100.2/24 | +# | + $rp2 | +# | | | +# +----|----------------------------------------------------------------------+ +# | +# +----|----------------------------+ +# | | default via 198.51.100.2 | +# | | default via 2001:db8:2::2 | +# | | | +# | | 2001:db8:2::1/64 | +# | | 198.51.100.1/24 | +# | + $h2 | +# | H2 (vrf) | +# +---------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + mtu_value_is_too_small_test + ttl_value_is_too_small_test + mc_reverse_path_forwarding_test + reject_route_test + unresolved_neigh_test + ipv4_lpm_miss_test + ipv6_lpm_miss_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 + + ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2 + + tc qdisc add dev $h1 clsact +} + +h1_destroy() +{ + tc qdisc del dev $h1 clsact + + ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2 + ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 + + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64 + + ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 + ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2 +} + +h2_destroy() +{ + ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2 + ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 + + simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64 +} + +router_create() +{ + ip link set dev $rp1 up + ip link set dev $rp2 up + + tc qdisc add dev $rp2 clsact + + __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 + __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64 +} + +router_destroy() +{ + __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64 + __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64 + + tc qdisc del dev $rp2 clsact +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rp1=${NETIFS[p2]} + + rp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + rp1mac=$(mac_get $rp1) + + start_mcd + + vrf_prepare + forwarding_enable + + h1_create + h2_create + + router_create +} + +cleanup() +{ + pre_cleanup + + router_destroy + + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup + + kill_mcd +} + +ping_check() +{ + ping_do $h1 198.51.100.1 + check_err $? "Packets that should not be trapped were trapped" +} + +trap_action_check() +{ + local trap_name=$1; shift + local expected_action=$1; shift + + action=$(devlink_trap_action_get $trap_name) + if [ "$action" != $expected_action ]; then + check_err 1 "Trap $trap_name has wrong action: $action" + fi +} + +mtu_value_is_too_small_test() +{ + local trap_name="mtu_value_is_too_small" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # type - Destination Unreachable + # code - Fragmentation Needed and Don't Fragment was Set + tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \ + flower skip_hw ip_proto icmp type 3 code 4 action pass + + mtu_set $rp2 1300 + + # Generate IP packets bigger than router's MTU with don't fragment + # flag on. + $MZ $h1 -t udp "sp=54321,dp=12345,df" -p 1400 -c 0 -d 1msec -b $rp1mac \ + -B 198.51.100.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "Packets were not received to h1" + + log_test "MTU value is too small" + + mtu_restore $rp2 + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower +} + +__ttl_value_is_too_small_test() +{ + local ttl_val=$1; shift + local trap_name="ttl_value_is_too_small" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # type - Time Exceeded + # code - Time to Live exceeded in Transit + tc filter add dev $h1 ingress protocol ip pref 1 handle 101 \ + flower skip_hw ip_proto icmp type 11 code 0 action pass + + # Generate IP packets with small TTL + $MZ $h1 -t udp "ttl=$ttl_val,sp=54321,dp=12345" -c 0 -d 1msec \ + -b $rp1mac -B 198.51.100.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "Packets were not received to h1" + + log_test "TTL value is too small: TTL=$ttl_val" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower +} + +ttl_value_is_too_small_test() +{ + __ttl_value_is_too_small_test 0 + __ttl_value_is_too_small_test 1 +} + +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + +__mc_reverse_path_forwarding_test() +{ + local desc=$1; shift + local src_ip=$1; shift + local dst_ip=$1; shift + local dst_mac=$1; shift + local proto=$1; shift + local flags=${1:-""}; shift + local trap_name="mc_reverse_path_forwarding" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower dst_ip $dst_ip ip_proto udp action drop + + $MC_CLI -I $table_name add $rp1 $src_ip $dst_ip $rp2 + + # Generate packets to multicast address. + $MZ $h2 $flags -t udp "sp=54321,dp=12345" -c 0 -p 128 \ + -a 00:11:22:33:44:55 -b $dst_mac \ + -A $src_ip -B $dst_ip -q & + + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $rp2 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "Multicast reverse path forwarding: $desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower +} + +mc_reverse_path_forwarding_test() +{ + __mc_reverse_path_forwarding_test "IPv4" "192.0.2.1" "225.1.2.3" \ + "01:00:5e:01:02:03" "ip" + __mc_reverse_path_forwarding_test "IPv6" "2001:db8:1::1" "ff0e::3" \ + "33:33:00:00:00:03" "ipv6" "-6" +} + +__reject_route_test() +{ + local desc=$1; shift + local dst_ip=$1; shift + local proto=$1; shift + local ip_proto=$1; shift + local type=$1; shift + local code=$1; shift + local unreachable=$1; shift + local flags=${1:-""}; shift + local trap_name="reject_route" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + tc filter add dev $h1 ingress protocol $proto pref 1 handle 101 flower \ + skip_hw ip_proto $ip_proto type $type code $code action pass + + ip route add unreachable $unreachable + + # Generate pacekts to h2. The destination IP is unreachable. + $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B $dst_ip -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets_hitting "dev $h1 ingress" 101 + check_err $? "ICMP packet was not received to h1" + + log_test "Reject route: $desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + ip route del unreachable $unreachable + tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower +} + +reject_route_test() +{ + # type - Destination Unreachable + # code - Host Unreachable + __reject_route_test "IPv4" 198.51.100.1 "ip" "icmp" 3 1 \ + "198.51.100.0/26" + # type - Destination Unreachable + # code - No Route + __reject_route_test "IPv6" 2001:db8:2::1 "ipv6" "icmpv6" 1 0 \ + "2001:db8:2::0/66" "-6" +} + +__host_miss_test() +{ + local desc=$1; shift + local dip=$1; shift + local trap_name="unresolved_neigh" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + ip neigh flush dev $rp2 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + + # Generate packets to h2 (will incur a unresolved neighbor). + # The ping should pass and devlink counters should be increased. + ping_do $h1 $dip + check_err $? "ping failed: $desc" + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + log_test "Unresolved neigh: host miss: $desc" +} + +__invalid_nexthop_test() +{ + local desc=$1; shift + local dip=$1; shift + local extra_add=$1; shift + local subnet=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + ip address add $extra_add/$subnet dev $h2 + + # Check that correct route does not trigger unresolved_neigh + ip $flags route add $dip via $extra_add dev $rp2 + + # Generate packets in order to discover all neighbours. + # Without it, counters of unresolved_neigh will be increased + # during neighbours discovery and the check below will fail + # for a wrong reason + ping_do $h1 $dip + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -ne $t1_packets ]]; then + check_err 1 "Trap counter increased when it should not" + fi + + ip $flags route del $dip via $extra_add dev $rp2 + + # Check that route to nexthop that does not exist trigger + # unresolved_neigh + ip $flags route add $dip via $via_add dev $h2 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip $flags route del $dip via $via_add dev $h2 + ip address del $extra_add/$subnet dev $h2 + log_test "Unresolved neigh: nexthop does not exist: $desc" +} + +unresolved_neigh_test() +{ + __host_miss_test "IPv4" 198.51.100.1 + __host_miss_test "IPv6" 2001:db8:2::1 + __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4 + __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \ + 2001:db8:2::4 +} + +vrf_without_routes_create() +{ + # VRF creating makes the links to be down and then up again. + # By default, IPv6 address is not saved after link becomes down. + # Save IPv6 address using sysctl configuration. + sysctl_set net.ipv6.conf.$rp1.keep_addr_on_down 1 + sysctl_set net.ipv6.conf.$rp2.keep_addr_on_down 1 + + ip link add dev vrf1 type vrf table 101 + ip link set dev $rp1 master vrf1 + ip link set dev $rp2 master vrf1 + ip link set dev vrf1 up + + # Wait for rp1 and rp2 to be up + setup_wait +} + +vrf_without_routes_destroy() +{ + ip link set dev $rp1 nomaster + ip link set dev $rp2 nomaster + ip link del dev vrf1 + + sysctl_restore net.ipv6.conf.$rp2.keep_addr_on_down + sysctl_restore net.ipv6.conf.$rp1.keep_addr_on_down + + # Wait for interfaces to be up + setup_wait +} + +ipv4_lpm_miss_test() +{ + local trap_name="ipv4_lpm_miss" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # Create a VRF without a default route + vrf_without_routes_create + + # Generate packets through a VRF without a matching route. + $MZ $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B 203.0.113.1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + log_test "LPM miss: IPv4" + + kill $mz_pid && wait $mz_pid &> /dev/null + vrf_without_routes_destroy +} + +ipv6_lpm_miss_test() +{ + local trap_name="ipv6_lpm_miss" + local group_name="l3_drops" + local expected_action="trap" + local mz_pid + + RET=0 + + ping_check $trap_name + trap_action_check $trap_name $expected_action + + # Create a VRF without a default route + vrf_without_routes_create + + # Generate packets through a VRF without a matching route. + $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 0 -d 1msec -b $rp1mac \ + -B 2001:db8::1 -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + log_test "LPM miss: IPv6" + + kill $mz_pid && wait $mz_pid &> /dev/null + vrf_without_routes_destroy +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh new file mode 100755 index 000000000000..039629bb92a3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -0,0 +1,265 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel exceptions functionality over mlxsw. +# Check all exception traps to make sure they are triggered under the right +# conditions. + +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.1/28 | | +# +-------------------|-----+ +# | +# +-------------------|-----+ +# | SW1 | | +# | $swp1 + | +# | 192.0.2.2/28 | +# | | +# | + g1a (gre) | +# | loc=192.0.2.65 | +# | rem=192.0.2.66 | +# | tos=inherit | +# | | +# | + $rp1 | +# | | 198.51.100.1/28 | +# +--|----------------------+ +# | +# +--|----------------------+ +# | | VRF2 | +# | + $rp2 | +# | 198.51.100.2/28 | +# +-------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +vrf2_create() +{ + simple_if_init $rp2 198.51.100.2/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 198.51.100.2/28 +} + +switch_create() +{ + __addr_add_del $swp1 add 192.0.2.2/28 + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 up + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit + __addr_add_del g1 add 192.0.2.65/32 + ip link set dev g1 up + + __addr_add_del $rp1 add 198.51.100.1/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + __addr_add_del $rp1 del 198.51.100.1/28 + + ip link set dev g1 down + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + ip link set dev $swp1 down + tc qdisc del dev $swp1 clsact + __addr_add_del $swp1 del 192.0.2.2/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + forwarding_enable + vrf_prepare + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + vrf_cleanup + forwarding_restore +} + +ecn_payload_get() +{ + p=$(: + )"0"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A 192.0.2.66 -B 192.0.2.65 -t ip \ + len=48,tos=$outer_tos,proto=47,p=$payload -q & + + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +ipip_payload_get() +{ + local flags=$1; shift + local key=$1; shift + + p=$(: + )"$flags"$( : GRE flags + )"0:00:"$( : Reserved + version + )"08:00:"$( : ETH protocol type + )"$key"$( : Key + )"4"$( : IP version + )"5:"$( : IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"30:"$( : IP TTL + )"01:"$( : IP proto + )"E7:E6:"$( : IP header csum + )"C0:00:01:01:"$( : IP saddr : 192.0.1.1 + )"C0:00:02:01:"$( : IP daddr : 192.0.2.1 + ) + echo $p +} + +no_matching_tunnel_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local sip=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.1.1 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + rp2_mac=$(mac_get $rp2) + payload=$(ipip_payload_get "$@") + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \ + -A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + # Correct source IP - the remote address + local sip=192.0.2.66 + + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + no_matching_tunnel_test "Decap error: Source IP check failed" \ + 192.0.2.68 "0" + no_matching_tunnel_test \ + "Decap error: Key exists but was not expected" $sip "2" ":E9:" + + # Destroy the tunnel and create new one with key + __addr_add_del g1 del 192.0.2.65/32 + tunnel_destroy g1 + + tunnel_create g1 gre 192.0.2.65 192.0.2.66 tos inherit key 233 + __addr_add_del g1 add 192.0.2.65/32 + + no_matching_tunnel_test \ + "Decap error: Key does not exist but was expected" $sip "0" + no_matching_tunnel_test \ + "Decap error: Packet has a wrong key field" $sip "2" "E8:" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh new file mode 100755 index 000000000000..fd19161dd4ec --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -0,0 +1,330 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test devlink-trap tunnel drops and exceptions functionality over mlxsw. +# Check all traps to make sure they are triggered under the right +# conditions. + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | VRF2 | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + decap_error_test + overlay_smac_is_mc_test +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link add name vx1 type vxlan id 1000 local 192.0.2.17 \ + dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx1 master br1 + ip link set dev vx1 up + + ip address add dev $rp1 192.0.2.17/28 + ip link set dev $rp1 up +} + +switch_destroy() +{ + ip link set dev $rp1 down + ip address del dev $rp1 192.0.2.17/28 + + ip link set dev vx1 down + ip link set dev vx1 nomaster + ip link del dev vx1 + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link del dev br1 +} + +vrf2_create() +{ + simple_if_init $rp2 192.0.2.18/28 +} + +vrf2_destroy() +{ + simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + h1_create + switch_create + vrf2_create +} + +cleanup() +{ + pre_cleanup + + vrf2_destroy + switch_destroy + h1_destroy + forwarding_restore + vrf_cleanup +} + +ecn_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"D6:E5:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +ecn_decap_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local ecn_desc=$1; shift + local outer_tos=$1; shift + local mz_pid + + RET=0 + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(ecn_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac -B 192.0.2.17 \ + -t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +reserved_bits_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"00:00:00:00:00:00:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +short_payload_get() +{ + dest_mac=$(mac_get $h1) + p=$(: + )"08:"$( : VXLAN flags + )"01:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + ) + echo $p +} + +corrupted_packet_test() +{ + local trap_name="decap_error" + local group_name="tunnel_drops" + local desc=$1; shift + local payload_get=$1; shift + local mz_pid + + RET=0 + + # In case of too short packet, there is no any inner packet, + # so the matching will always succeed + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_hw src_ip 192.0.2.3 dst_ip 192.0.2.1 action pass + + rp1_mac=$(mac_get $rp1) + payload=$($payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_exception_test $trap_name $group_name + + tc_check_packets "dev $swp1 egress" 101 0 + check_err $? "Packets were not dropped" + + log_test "$desc" + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower +} + +decap_error_test() +{ + ecn_decap_test "Decap error" "ECT(1)" 01 + ecn_decap_test "Decap error" "ECT(0)" 02 + ecn_decap_test "Decap error" "CE" 03 + + corrupted_packet_test "Decap error: Reserved bits in use" \ + "reserved_bits_payload_get" + corrupted_packet_test "Decap error: No L2 header" "short_payload_get" +} + +mc_smac_payload_get() +{ + dest_mac=$(mac_get $h1) + source_mac=01:02:03:04:05:06 + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"00:03:e8:"$( : VXLAN VNI : 1000 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$source_mac:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:14:"$( : IP total length + )"00:00:"$( : IP identification + )"20:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"00:"$( : IP proto + )"00:00:"$( : IP header csum + )"c0:00:02:03:"$( : IP saddr: 192.0.2.3 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +overlay_smac_is_mc_test() +{ + local trap_name="overlay_smac_is_mc" + local group_name="tunnel_drops" + local mz_pid + + RET=0 + + # The matching will be checked on devlink_trap_drop_test() + # and the filter will be removed on devlink_trap_drop_cleanup() + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower src_mac 01:02:03:04:05:06 action pass + + rp1_mac=$(mac_get $rp1) + payload=$(mc_smac_payload_get) + + ip vrf exec v$rp2 $MZ $rp2 -c 0 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q & + mz_pid=$! + + devlink_trap_drop_test $trap_name $group_name $swp1 + + log_test "Overlay source MAC is multicast" + + devlink_trap_drop_cleanup $mz_pid $swp1 "ip" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh new file mode 100755 index 000000000000..eab79b9e58cd --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API on top of mlxsw. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv4_local_replace + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath + ipv6_local_replace +" +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source $lib_dir/fib_offload_lib.sh + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv4_local_replace() +{ + local ns="testns1" + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add table local 192.0.2.1/32 dev dummy1 + fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false + check_err $? "Local table route not in hardware when should" + + ip -n $ns route add table main 192.0.2.1/32 dev dummy1 + fib4_trap_check $ns "table main 192.0.2.1/32 dev dummy1" true + check_err $? "Main table route in hardware when should not" + + fib4_trap_check $ns "table local 192.0.2.1/32 dev dummy1" false + check_err $? "Local table route was replaced when should not" + + # Test that local routes can replace routes in main table. + ip -n $ns route add table main 192.0.2.2/32 dev dummy1 + fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" false + check_err $? "Main table route not in hardware when should" + + ip -n $ns route add table local 192.0.2.2/32 dev dummy1 + fib4_trap_check $ns "table local 192.0.2.2/32 dev dummy1" false + check_err $? "Local table route did not replace route in main table when should" + + fib4_trap_check $ns "table main 192.0.2.2/32 dev dummy1" true + check_err $? "Main table route was not replaced when should" + + log_test "IPv4 local table route replacement" + + ip -n $ns link del dev dummy1 +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +ipv6_local_replace() +{ + local ns="testns1" + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add table local 2001:db8:1::1/128 dev dummy1 + fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false + check_err $? "Local table route not in hardware when should" + + ip -n $ns route add table main 2001:db8:1::1/128 dev dummy1 + fib6_trap_check $ns "table main 2001:db8:1::1/128 dev dummy1" true + check_err $? "Main table route in hardware when should not" + + fib6_trap_check $ns "table local 2001:db8:1::1/128 dev dummy1" false + check_err $? "Local table route was replaced when should not" + + # Test that local routes can replace routes in main table. + ip -n $ns route add table main 2001:db8:1::2/128 dev dummy1 + fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" false + check_err $? "Main table route not in hardware when should" + + ip -n $ns route add table local 2001:db8:1::2/128 dev dummy1 + fib6_trap_check $ns "table local 2001:db8:1::2/128 dev dummy1" false + check_err $? "Local route route did not replace route in main table when should" + + fib6_trap_check $ns "table main 2001:db8:1::2/128 dev dummy1" true + check_err $? "Main table route was not replaced when should" + + log_test "IPv6 local table route replacement" + + ip -n $ns link del dev dummy1 +} + +setup_prepare() +{ + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + devlink -N testns1 dev reload $DEVLINK_DEV netns $$ + ip netns del testns1 +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh new file mode 100755 index 000000000000..eff6393ce974 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh @@ -0,0 +1,176 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test for port-default priority. Non-IP packets ingress $swp1 and are +# prioritized according to the default priority specified at the port. +# rx_octets_prio_* counters are used to verify the prioritization. +# +# +-----------------------+ +# | H1 | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|------------------+ +# | +# +----|------------------+ +# | SW | | +# | + $swp1 | +# | 192.0.2.2/28 | +# | APP=<prio>,1,0 | +# +-----------------------+ + +ALL_TESTS=" + ping_ipv4 + test_defprio +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=2 +: ${HIT_TIMEOUT:=1000} # ms +source $lib_dir/lib.sh + +declare -a APP + +defprio_install() +{ + local dev=$1; shift + local prio=$1; shift + local app="app=$prio,1,0" + + lldptool -T -i $dev -V APP $app >/dev/null + lldpad_app_wait_set $dev + APP[$prio]=$app +} + +defprio_uninstall() +{ + local dev=$1; shift + local prio=$1; shift + local app=${APP[$prio]} + + lldptool -T -i $dev -V APP -d $app >/dev/null + lldpad_app_wait_del + unset APP[$prio] +} + +defprio_flush() +{ + local dev=$1; shift + local prio + + if ((${#APP[@]})); then + lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null + fi + lldpad_app_wait_del + APP=() +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link set dev $swp1 up + ip addr add dev $swp1 192.0.2.2/28 +} + +switch_destroy() +{ + defprio_flush $swp1 + ip addr del dev $swp1 192.0.2.2/28 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +wait_for_packets() +{ + local t0=$1; shift + local prio_observe=$1; shift + + local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) + local delta=$((t1 - t0)) + echo $delta + ((delta >= 10)) +} + +__test_defprio() +{ + local prio_install=$1; shift + local prio_observe=$1; shift + local delta + local key + local i + + RET=0 + + defprio_install $swp1 $prio_install + + local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe) + mausezahn -q $h1 -d 100m -c 10 -t arp reply + delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe) + + check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta." + log_test "Default priority $prio_install/$prio_observe" + + defprio_uninstall $swp1 $prio_install +} + +test_defprio() +{ + local prio + + for prio in {0..7}; do + __test_defprio $prio $prio + done + + defprio_install $swp1 3 + __test_defprio 0 3 + __test_defprio 1 3 + __test_defprio 2 3 + __test_defprio 4 4 + __test_defprio 5 5 + __test_defprio 6 6 + __test_defprio 7 7 + defprio_uninstall $swp1 3 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 40f16f2a3afd..5cbff8038f84 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -36,8 +36,6 @@ source $lib_dir/lib.sh h1_create() { - local dscp; - simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact dscp_capture_install $h1 10 @@ -67,6 +65,7 @@ h2_destroy() dscp_map() { local base=$1; shift + local prio for prio in {0..7}; do echo app=$prio,5,$((base + prio)) @@ -138,6 +137,7 @@ dscp_ping_test() local prio=$1; shift local dev_10=$1; shift local dev_20=$1; shift + local key local dscp_10=$(((prio + 10) << 2)) local dscp_20=$(((prio + 20) << 2)) @@ -175,6 +175,8 @@ dscp_ping_test() test_dscp() { + local prio + for prio in {0..7}; do dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2 done diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh index 9faf02e32627..c745ce3befee 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh @@ -31,6 +31,7 @@ ALL_TESTS=" ping_ipv4 test_update test_no_update + test_dscp_leftover " lib_dir=$(dirname $0)/../../../net/forwarding @@ -50,10 +51,13 @@ reprioritize() echo ${reprio[$in]} } -h1_create() +zero() { - local dscp; + echo 0 +} +h1_create() +{ simple_if_init $h1 192.0.2.1/28 tc qdisc add dev $h1 clsact dscp_capture_install $h1 0 @@ -87,6 +91,7 @@ h2_destroy() dscp_map() { local base=$1; shift + local prio for prio in {0..7}; do echo app=$prio,5,$((base + prio)) @@ -156,6 +161,7 @@ dscp_ping_test() local reprio=$1; shift local dev1=$1; shift local dev2=$1; shift + local i local prio2=$($reprio $prio) # ICMP Request egress prio local prio3=$($reprio $prio2) # ICMP Response egress prio @@ -205,6 +211,7 @@ __test_update() { local update=$1; shift local reprio=$1; shift + local prio sysctl_restore net.ipv4.ip_forward_update_priority sysctl_set net.ipv4.ip_forward_update_priority $update @@ -224,6 +231,19 @@ test_no_update() __test_update 0 echo } +# Test that when the last APP rule is removed, the prio->DSCP map is properly +# set to zeroes, and that the last APP rule does not stay active in the ASIC. +test_dscp_leftover() +{ + lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null + lldpad_app_wait_del + + __test_update 0 zero + + lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null + lldpad_app_wait_set $swp2 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index e80be65799ad..faa51012cdac 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -1,47 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -humanize() -{ - local speed=$1; shift - - for unit in bps Kbps Mbps Gbps; do - if (($(echo "$speed < 1024" | bc))); then - break - fi - - speed=$(echo "scale=1; $speed / 1024" | bc) - done - - echo "$speed${unit}" -} - -rate() -{ - local t0=$1; shift - local t1=$1; shift - local interval=$1; shift - - echo $((8 * (t1 - t0) / interval)) -} - -start_traffic() -{ - local h_in=$1; shift # Where the traffic egresses the host - local sip=$1; shift - local dip=$1; shift - local dmac=$1; shift - - $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ - -a own -b $dmac -t udp -q & - sleep 1 -} - -stop_traffic() -{ - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null -} - check_rate() { local rate=$1; shift @@ -96,3 +54,31 @@ measure_rate() echo $ir $er return $ret } + +bail_on_lldpad() +{ + if systemctl is-active --quiet lldpad; then + + cat >/dev/stderr <<-EOF + WARNING: lldpad is running + + lldpad will likely configure DCB, and this test will + configure Qdiscs. mlxsw does not support both at the + same time, one of them is arbitrarily going to overwrite + the other. That will cause spurious failures (or, + unlikely, passes) of this test. + EOF + + if [[ -z $ALLOW_LLDPAD ]]; then + cat >/dev/stderr <<-EOF + + If you want to run the test anyway, please set + an environment variable ALLOW_LLDPAD to a + non-empty string. + EOF + exit 1 + else + return + fi + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 47315fe48d5a..24dd8ed48580 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -232,7 +232,7 @@ test_mc_aware() stop_traffic local ucth1=${uc_rate[1]} - start_traffic $h1 own bc bc + start_traffic $h1 192.0.2.65 bc bc local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) @@ -254,7 +254,11 @@ test_mc_aware() ret = 100 * ($ucth1 - $ucth2) / $ucth1 if (ret > 0) { ret } else { 0 } ") - check_err $(bc <<< "$deg > 25") + + # Minimum shaper of 200Mbps on MC TCs should cause about 20% of + # degradation on 1Gbps link. + check_err $(bc <<< "$deg < 15") "Minimum shaper not in effect" + check_err $(bc <<< "$deg > 25") "MC traffic degrades UC performance too much" local interval=$((d1 - d0)) local mc_ir=$(rate $u0 $u1 $interval) diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh new file mode 100755 index 000000000000..c9fc4d4885c1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A driver for the ETS selftest that implements testing in offloaded datapath. +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/sch_ets_core.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +ALL_TESTS=" + ping_ipv4 + priomap_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr +" + +switch_create() +{ + ets_switch_create + + # Create a bottleneck so that the DWRR process can kick in. + ethtool -s $h2 speed 1000 autoneg off + ethtool -s $swp2 speed 1000 autoneg off + + # Set the ingress quota high and use the three egress TCs to limit the + # amount of traffic that is admitted to the shared buffers. This makes + # sure that there is always enough traffic of all types to select from + # for the DWRR process. + devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 + + # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet + # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is + # not offloaded by mlxsw as of this writing, but the mapping used is + # 1:1, which is the mapping currently hard-coded by the driver. +} + +switch_destroy() +{ + devlink_tc_bind_pool_th_restore $swp2 5 egress + devlink_tc_bind_pool_th_restore $swp2 6 egress + devlink_tc_bind_pool_th_restore $swp2 7 egress + devlink_port_pool_th_restore $swp2 4 + devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_restore $swp1 0 + + ethtool -s $swp2 autoneg on + ethtool -s $h2 autoneg on + + ets_switch_destroy +} + +# Callback from sch_ets_tests.sh +get_stats() +{ + local band=$1; shift + + ethtool_stats_get "$h2" rx_octets_prio_$band +} + +bail_on_lldpad +ets_run diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh new file mode 100755 index 000000000000..c6ce0b448bf3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_ets.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh new file mode 100755 index 000000000000..8d245f331619 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_prio.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh new file mode 100755 index 000000000000..013886061f15 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source qos_lib.sh +bail_on_lldpad + +lib_dir=$(dirname $0)/../../../net/forwarding +TCFLAGS=skip_sw +source $lib_dir/sch_tbf_root.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh new file mode 100644 index 000000000000..f7c168decd1e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/mirror_gre_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../mirror_gre_scale.sh + +mirror_gre_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh new file mode 100755 index 000000000000..7b2acba82a49 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then + echo "SKIP: test is tailored for Mellanox Spectrum-2" + exit 1 +fi + +current_test="" + +cleanup() +{ + pre_cleanup + if [ ! -z $current_test ]; then + ${current_test}_cleanup + fi + # Need to reload in order to avoid router abort. + devlink_reload +} + +trap cleanup EXIT + +ALL_TESTS="router tc_flower mirror_gre" +for current_test in ${TESTS:-$ALL_TESTS}; do + source ${current_test}_scale.sh + + num_netifs_var=${current_test^^}_NUM_NETIFS + num_netifs=${!num_netifs_var:-$NUM_NETIFS} + + for should_fail in 0 1; do + RET=0 + target=$(${current_test}_get_target "$should_fail") + ${current_test}_setup_prepare + setup_wait $num_netifs + ${current_test}_test "$target" "$should_fail" + ${current_test}_cleanup + devlink_reload + if [[ "$should_fail" -eq 0 ]]; then + log_test "'$current_test' $target" + else + log_test "'$current_test' overflow $target" + fi + done +done +current_test="" + +exit "$RET" diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh new file mode 100644 index 000000000000..1897e163e3ab --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/router_scale.sh @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../router_scale.sh + +router_get_target() +{ + local should_fail=$1 + local target + + target=$(devlink_resource_size_get kvd) + + if [[ $should_fail -eq 0 ]]; then + target=$((target * 85 / 100)) + else + target=$((target + 1)) + fi + + echo $target +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh new file mode 100644 index 000000000000..a0795227216e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../tc_flower_scale.sh + +tc_flower_get_target() +{ + local should_fail=$1; shift + + # The driver associates a counter with each tc filter, which means the + # number of supported filters is bounded by the number of available + # counters. + # Currently, the driver supports 12K (12,288) flow counters and six of + # these are used for multicast routing. + local target=12282 + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh index 8d2186c7c62b..f7c168decd1e 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh @@ -4,10 +4,13 @@ source ../mirror_gre_scale.sh mirror_gre_get_target() { local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get span_agents) if ((! should_fail)); then - echo 3 + echo $target else - echo 4 + echo $((target + 1)) fi } diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index ae6146ec5afd..4632f51af7ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -112,14 +112,16 @@ sanitization_single_dev_mcast_group_test() RET=0 ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ ttl 20 tos inherit local 198.51.100.1 dstport 4789 \ - dev $swp2 group 239.0.0.1 + dev dummy1 group 239.0.0.1 sanitization_single_dev_test_fail ip link del dev vxlan0 + ip link del dev dummy1 ip link del dev br0 log_test "vxlan device with a multicast group" @@ -181,13 +183,15 @@ sanitization_single_dev_local_interface_test() RET=0 ip link add dev br0 type bridge mcast_snooping 0 + ip link add name dummy1 up type dummy ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \ - ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev $swp2 + ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1 sanitization_single_dev_test_fail ip link del dev vxlan0 + ip link del dev dummy1 ip link del dev br0 log_test "vxlan device with local interface" diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index 9d8baf5d14b3..025a84c2ab5a 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -3,7 +3,9 @@ lib_dir=$(dirname $0)/../../../net/forwarding -ALL_TESTS="fw_flash_test" +ALL_TESTS="fw_flash_test params_test regions_test reload_test \ + netns_reload_test resource_test dev_info_test \ + empty_reporter_test dummy_reporter_test" NUM_NETIFS=0 source $lib_dir/lib.sh @@ -30,6 +32,417 @@ fw_flash_test() log_test "fw flash test" } +param_get() +{ + local name=$1 + + cmd_jq "devlink dev param show $DL_HANDLE name $name -j" \ + '.[][][].values[] | select(.cmode == "driverinit").value' +} + +param_set() +{ + local name=$1 + local value=$2 + + devlink dev param set $DL_HANDLE name $name cmode driverinit value $value +} + +check_value() +{ + local name=$1 + local phase_name=$2 + local expected_param_value=$3 + local expected_debugfs_value=$4 + local value + + value=$(param_get $name) + check_err $? "Failed to get $name param value" + [ "$value" == "$expected_param_value" ] + check_err $? "Unexpected $phase_name $name param value" + value=$(<$DEBUGFS_DIR/$name) + check_err $? "Failed to get $name debugfs value" + [ "$value" == "$expected_debugfs_value" ] + check_err $? "Unexpected $phase_name $name debugfs value" +} + +params_test() +{ + RET=0 + + local max_macs + local test1 + + check_value max_macs initial 32 32 + check_value test1 initial true Y + + param_set max_macs 16 + check_err $? "Failed to set max_macs param value" + param_set test1 false + check_err $? "Failed to set test1 param value" + + check_value max_macs post-set 16 32 + check_value test1 post-set false Y + + devlink dev reload $DL_HANDLE + + check_value max_macs post-reload 16 16 + check_value test1 post-reload false N + + log_test "params test" +} + +check_region_size() +{ + local name=$1 + local size + + size=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].size') + check_err $? "Failed to get $name region size" + [ $size -eq 32768 ] + check_err $? "Invalid $name region size" +} + +check_region_snapshot_count() +{ + local name=$1 + local phase_name=$2 + local expected_count=$3 + local count + + count=$(devlink region show $DL_HANDLE/$name -j | jq -e -r '.[][].snapshot | length') + [ $count -eq $expected_count ] + check_err $? "Unexpected $phase_name snapshot count" +} + +regions_test() +{ + RET=0 + + local count + + check_region_size dummy + check_region_snapshot_count dummy initial 0 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take first dummy region snapshot" + check_region_snapshot_count dummy post-first-snapshot 1 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take second dummy region snapshot" + check_region_snapshot_count dummy post-second-snapshot 2 + + echo ""> $DEBUGFS_DIR/take_snapshot + check_err $? "Failed to take third dummy region snapshot" + check_region_snapshot_count dummy post-third-snapshot 3 + + devlink region del $DL_HANDLE/dummy snapshot 1 + check_err $? "Failed to delete first dummy region snapshot" + + check_region_snapshot_count dummy post-first-delete 2 + + log_test "regions test" +} + +reload_test() +{ + RET=0 + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload" + + echo "y"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload to fail" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/fail_reload + check_err $? "Failed to setup devlink reload not to fail" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after set not to fail" + + echo "y"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to forbid devlink reload" + + devlink dev reload $DL_HANDLE + check_fail $? "Unexpected success of devlink reload" + + echo "n"> $DEBUGFS_DIR/dont_allow_reload + check_err $? "Failed to re-enable devlink reload" + + devlink dev reload $DL_HANDLE + check_err $? "Failed to reload after re-enable" + + log_test "reload test" +} + +netns_reload_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_err $? "Failed to reload from netns \"testns1\" into netns \"testns2\"" + + ip netns del testns2 + ip netns del testns1 + + log_test "netns reload test" +} + +DUMMYDEV="dummytest" + +res_val_get() +{ + local netns=$1 + local parentname=$2 + local name=$3 + local type=$4 + + cmd_jq "devlink -N $netns resource show $DL_HANDLE -j" \ + ".[][][] | select(.name == \"$parentname\").resources[] \ + | select(.name == \"$name\").$type" +} + +resource_test() +{ + RET=0 + + ip netns add testns1 + check_err $? "Failed add netns \"testns1\"" + ip netns add testns2 + check_err $? "Failed add netns \"testns2\"" + + devlink dev reload $DL_HANDLE netns testns1 + check_err $? "Failed to reload into netns \"testns1\"" + + # Create dummy dev to add the address and routes on. + + ip -n testns1 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns1 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns1 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + + local occ=$(res_val_get testns1 IPv4 fib occ) + local limit=$((occ+1)) + + # Set fib size limit to handle one another route only. + + devlink -N testns1 resource set $DL_HANDLE path IPv4/fib size $limit + check_err $? "Failed to set IPv4/fib resource size" + local size_new=$(res_val_get testns1 IPv4 fib size_new) + [ "$size_new" -eq "$limit" ] + check_err $? "Unexpected \"size_new\" value (got $size_new, expected $limit)" + + devlink -N testns1 dev reload $DL_HANDLE + check_err $? "Failed to reload" + local size=$(res_val_get testns1 IPv4 fib size) + [ "$size" -eq "$limit" ] + check_err $? "Unexpected \"size\" value (got $size, expected $limit)" + + # Insert 2 routes, the first is going to be inserted, + # the second is expected to fail to be inserted. + + ip -n testns1 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + ip -n testns1 r a 192.0.3.0/24 via 192.0.1.2 + check_fail $? "Unexpected successful route add over limit" + + # Now create another dummy in second network namespace and + # insert two routes. That is over the limit of the netdevsim + # instance in the first namespace. Move the netdevsim instance + # into the second namespace and expect it to fail. + + ip -n testns2 link add name $DUMMYDEV type dummy + check_err $? "Failed create dummy device" + ip -n testns2 link set $DUMMYDEV up + check_err $? "Failed bring up dummy device" + ip -n testns2 a a 192.0.1.1/24 dev $DUMMYDEV + check_err $? "Failed add an IP address to dummy device" + ip -n testns2 r a 192.0.2.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + ip -n testns2 r a 192.0.3.0/24 via 192.0.1.2 + check_err $? "Failed to add route" + + devlink -N testns1 dev reload $DL_HANDLE netns testns2 + check_fail $? "Unexpected successful reload from netns \"testns1\" into netns \"testns2\"" + + devlink -N testns2 resource set $DL_HANDLE path IPv4/fib size ' -1' + check_err $? "Failed to reset IPv4/fib resource size" + + devlink -N testns2 dev reload $DL_HANDLE netns 1 + check_err $? "Failed to reload devlink back" + + ip netns del testns2 + ip netns del testns1 + + log_test "resource test" +} + +info_get() +{ + local name=$1 + + cmd_jq "devlink dev info $DL_HANDLE -j" ".[][][\"$name\"]" "-e" +} + +dev_info_test() +{ + RET=0 + + driver=$(info_get "driver") + check_err $? "Failed to get driver name" + [ "$driver" == "netdevsim" ] + check_err $? "Unexpected driver name $driver" + + log_test "dev_info test" +} + +empty_reporter_test() +{ + RET=0 + + devlink health show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show empty reporter" + + devlink health dump show $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed show dump of empty reporter" + + devlink health diagnose $DL_HANDLE reporter empty >/dev/null + check_err $? "Failed diagnose empty reporter" + + devlink health recover $DL_HANDLE reporter empty + check_err $? "Failed recover empty reporter" + + log_test "empty reporter test" +} + +check_reporter_info() +{ + local name=$1 + local expected_state=$2 + local expected_error=$3 + local expected_recover=$4 + local expected_grace_period=$5 + local expected_auto_recover=$6 + + local show=$(devlink health show $DL_HANDLE reporter $name -j | jq -e -r ".[][][]") + check_err $? "Failed show $name reporter" + + local state=$(echo $show | jq -r ".state") + [ "$state" == "$expected_state" ] + check_err $? "Unexpected \"state\" value (got $state, expected $expected_state)" + + local error=$(echo $show | jq -r ".error") + [ "$error" == "$expected_error" ] + check_err $? "Unexpected \"error\" value (got $error, expected $expected_error)" + + local recover=`echo $show | jq -r ".recover"` + [ "$recover" == "$expected_recover" ] + check_err $? "Unexpected \"recover\" value (got $recover, expected $expected_recover)" + + local grace_period=$(echo $show | jq -r ".grace_period") + check_err $? "Failed get $name reporter grace_period" + [ "$grace_period" == "$expected_grace_period" ] + check_err $? "Unexpected \"grace_period\" value (got $grace_period, expected $expected_grace_period)" + + local auto_recover=$(echo $show | jq -r ".auto_recover") + [ "$auto_recover" == "$expected_auto_recover" ] + check_err $? "Unexpected \"auto_recover\" value (got $auto_recover, expected $expected_auto_recover)" +} + +dummy_reporter_test() +{ + RET=0 + + check_reporter_info dummy healthy 0 0 0 false + + local BREAK_MSG="foo bar" + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy error 1 0 0 false + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + local dump_break_msg=$(echo $dump | jq -r ".break_message") + [ "$dump_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected dump break message value (got $dump_break_msg, expected $BREAK_MSG)" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 1 1 0 false + + devlink health set $DL_HANDLE reporter dummy auto_recover true + check_err $? "Failed to dummy reporter auto_recover option" + + check_reporter_info dummy healthy 1 1 0 true + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_err $? "Failed to break dummy reporter" + + check_reporter_info dummy healthy 2 2 0 true + + local diagnose=$(devlink health diagnose $DL_HANDLE reporter dummy -j -p) + check_err $? "Failed show diagnose of dummy reporter" + + local rcvrd_break_msg=$(echo $diagnose | jq -r ".recovered_break_message") + [ "$rcvrd_break_msg" == "$BREAK_MSG" ] + check_err $? "Unexpected recovered break message value (got $rcvrd_break_msg, expected $BREAK_MSG)" + + devlink health set $DL_HANDLE reporter dummy grace_period 10 + check_err $? "Failed to dummy reporter grace_period option" + + check_reporter_info dummy healthy 2 2 10 true + + echo "Y"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to fail" + + echo "$BREAK_MSG"> $DEBUGFS_DIR/health/break_health + check_fail $? "Unexpected success of dummy reporter break" + + check_reporter_info dummy error 3 2 10 true + + devlink health recover $DL_HANDLE reporter dummy + check_fail $? "Unexpected success of dummy reporter recover" + + echo "N"> $DEBUGFS_DIR/health/fail_recover + check_err $? "Failed set dummy reporter recovery to be successful" + + devlink health recover $DL_HANDLE reporter dummy + check_err $? "Failed recover dummy reporter" + + check_reporter_info dummy healthy 3 3 10 true + + echo 8192> $DEBUGFS_DIR/health/binary_len + check_fail $? "Failed set dummy reporter binary len to 8192" + + local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j) + check_err $? "Failed show dump of dummy reporter" + + devlink health dump clear $DL_HANDLE reporter dummy + check_err $? "Failed clear dump of dummy reporter" + + log_test "dummy reporter test" +} + setup_prepare() { modprobe netdevsim diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh new file mode 100755 index 000000000000..7effd35369e1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_in_netns.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS="check_devlink_test check_ports_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +BUS_ADDR=10 +PORT_COUNT=4 +DEV_NAME=netdevsim$BUS_ADDR +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/ +DL_HANDLE=netdevsim/$DEV_NAME +NETNS_NAME=testns1 + +port_netdev_get() +{ + local port_index=$1 + + cmd_jq "devlink -N $NETNS_NAME port show -j" \ + ".[][\"$DL_HANDLE/$port_index\"].netdev" "-e" +} + +check_ports_test() +{ + RET=0 + + for i in $(seq 0 $(expr $PORT_COUNT - 1)); do + netdev_name=$(port_netdev_get $i) + check_err $? "Failed to get netdev name for port $DL_HANDLE/$i" + ip -n $NETNS_NAME link show $netdev_name &> /dev/null + check_err $? "Failed to find netdev $netdev_name" + done + + log_test "check ports test" +} + +check_devlink_test() +{ + RET=0 + + devlink -N $NETNS_NAME dev show $DL_HANDLE &> /dev/null + check_err $? "Failed to show devlink instance" + + log_test "check devlink test" +} + +setup_prepare() +{ + modprobe netdevsim + ip netns add $NETNS_NAME + ip netns exec $NETNS_NAME \ + echo "$BUS_ADDR $PORT_COUNT" > /sys/bus/netdevsim/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done +} + +cleanup() +{ + pre_cleanup + echo "$BUS_ADDR" > /sys/bus/netdevsim/del_device + ip netns del $NETNS_NAME + modprobe -r netdevsim +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh new file mode 100755 index 000000000000..f101ab9441e2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh @@ -0,0 +1,364 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking devlink-trap functionality. It makes use of +# netdevsim which implements the required callbacks. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + init_test + trap_action_test + trap_metadata_test + bad_trap_test + bad_trap_action_test + trap_stats_test + trap_group_action_test + bad_trap_group_test + trap_group_stats_test + port_del_test + dev_del_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SLEEP_TIME=1 +NETDEV="" +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +require_command udevadm + +modprobe netdevsim &> /dev/null +if [ ! -d "$NETDEVSIM_PATH" ]; then + echo "SKIP: No netdevsim support" + exit 1 +fi + +if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then + echo "SKIP: Device netdevsim${DEV_ADDR} already exists" + exit 1 +fi + +init_test() +{ + RET=0 + + test $(devlink_traps_num_get) -ne 0 + check_err $? "No traps were registered" + + log_test "Initialization" +} + +trap_action_test() +{ + local orig_action + local trap_name + local action + + RET=0 + + for trap_name in $(devlink_traps_get); do + # The action of non-drop traps cannot be changed. + if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then + devlink_trap_action_set $trap_name "trap" + action=$(devlink_trap_action_get $trap_name) + if [ $action != "trap" ]; then + check_err 1 "Trap $trap_name did not change action to trap" + fi + + devlink_trap_action_set $trap_name "drop" + action=$(devlink_trap_action_get $trap_name) + if [ $action != "drop" ]; then + check_err 1 "Trap $trap_name did not change action to drop" + fi + else + orig_action=$(devlink_trap_action_get $trap_name) + + devlink_trap_action_set $trap_name "trap" + action=$(devlink_trap_action_get $trap_name) + if [ $action != $orig_action ]; then + check_err 1 "Trap $trap_name changed action when should not" + fi + + devlink_trap_action_set $trap_name "drop" + action=$(devlink_trap_action_get $trap_name) + if [ $action != $orig_action ]; then + check_err 1 "Trap $trap_name changed action when should not" + fi + fi + done + + log_test "Trap action" +} + +trap_metadata_test() +{ + local trap_name + + RET=0 + + for trap_name in $(devlink_traps_get); do + devlink_trap_metadata_test $trap_name "input_port" + check_err $? "Input port not reported as metadata of trap $trap_name" + done + + log_test "Trap metadata" +} + +bad_trap_test() +{ + RET=0 + + devlink_trap_action_set "made_up_trap" "drop" + check_fail $? "Did not get an error for non-existing trap" + + log_test "Non-existing trap" +} + +bad_trap_action_test() +{ + local traps_arr + local trap_name + + RET=0 + + # Pick first trap. + traps_arr=($(devlink_traps_get)) + trap_name=${traps_arr[0]} + + devlink_trap_action_set $trap_name "made_up_action" + check_fail $? "Did not get an error for non-existing trap action" + + log_test "Non-existing trap action" +} + +trap_stats_test() +{ + local trap_name + + RET=0 + + for trap_name in $(devlink_traps_get); do + devlink_trap_stats_idle_test $trap_name + check_err $? "Stats of trap $trap_name not idle when netdev down" + + ip link set dev $NETDEV up + + if [ $(devlink_trap_type_get $trap_name) = "drop" ]; then + devlink_trap_action_set $trap_name "trap" + devlink_trap_stats_idle_test $trap_name + check_fail $? "Stats of trap $trap_name idle when action is trap" + + devlink_trap_action_set $trap_name "drop" + devlink_trap_stats_idle_test $trap_name + check_err $? "Stats of trap $trap_name not idle when action is drop" + else + devlink_trap_stats_idle_test $trap_name + check_fail $? "Stats of non-drop trap $trap_name idle when should not" + fi + + ip link set dev $NETDEV down + done + + log_test "Trap statistics" +} + +trap_group_action_test() +{ + local curr_group group_name + local trap_name + local trap_type + local action + + RET=0 + + for group_name in $(devlink_trap_groups_get); do + devlink_trap_group_action_set $group_name "trap" + + for trap_name in $(devlink_traps_get); do + curr_group=$(devlink_trap_group_get $trap_name) + if [ $curr_group != $group_name ]; then + continue + fi + + trap_type=$(devlink_trap_type_get $trap_name) + if [ $trap_type != "drop" ]; then + continue + fi + + action=$(devlink_trap_action_get $trap_name) + if [ $action != "trap" ]; then + check_err 1 "Trap $trap_name did not change action to trap" + fi + done + + devlink_trap_group_action_set $group_name "drop" + + for trap_name in $(devlink_traps_get); do + curr_group=$(devlink_trap_group_get $trap_name) + if [ $curr_group != $group_name ]; then + continue + fi + + trap_type=$(devlink_trap_type_get $trap_name) + if [ $trap_type != "drop" ]; then + continue + fi + + action=$(devlink_trap_action_get $trap_name) + if [ $action != "drop" ]; then + check_err 1 "Trap $trap_name did not change action to drop" + fi + done + done + + log_test "Trap group action" +} + +bad_trap_group_test() +{ + RET=0 + + devlink_trap_group_action_set "made_up_trap_group" "drop" + check_fail $? "Did not get an error for non-existing trap group" + + log_test "Non-existing trap group" +} + +trap_group_stats_test() +{ + local group_name + + RET=0 + + for group_name in $(devlink_trap_groups_get); do + devlink_trap_group_stats_idle_test $group_name + check_err $? "Stats of trap group $group_name not idle when netdev down" + + ip link set dev $NETDEV up + + devlink_trap_group_action_set $group_name "trap" + devlink_trap_group_stats_idle_test $group_name + check_fail $? "Stats of trap group $group_name idle when action is trap" + + devlink_trap_group_action_set $group_name "drop" + ip link set dev $NETDEV down + done + + log_test "Trap group statistics" +} + +port_del_test() +{ + local group_name + local i + + # The test never fails. It is meant to exercise different code paths + # and make sure we properly dismantle a port while packets are + # in-flight. + RET=0 + + devlink_traps_enable_all + + for i in $(seq 1 10); do + ip link set dev $NETDEV up + + sleep $SLEEP_TIME + + netdevsim_port_destroy + netdevsim_port_create + udevadm settle + done + + devlink_traps_disable_all + + log_test "Port delete" +} + +dev_del_test() +{ + local group_name + local i + + # The test never fails. It is meant to exercise different code paths + # and make sure we properly unregister traps while packets are + # in-flight. + RET=0 + + devlink_traps_enable_all + + for i in $(seq 1 10); do + ip link set dev $NETDEV up + + sleep $SLEEP_TIME + + cleanup + setup_prepare + done + + devlink_traps_disable_all + + log_test "Device delete" +} + +netdevsim_dev_create() +{ + echo "$DEV_ADDR 0" > ${NETDEVSIM_PATH}/new_device +} + +netdevsim_dev_destroy() +{ + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device +} + +netdevsim_port_create() +{ + echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/new_port +} + +netdevsim_port_destroy() +{ + echo 1 > ${NETDEVSIM_PATH}/devices/${DEV}/del_port +} + +setup_prepare() +{ + local netdev + + netdevsim_dev_create + + if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}" ]; then + echo "Failed to create netdevsim device" + exit 1 + fi + + netdevsim_port_create + + if [ ! -d "${NETDEVSIM_PATH}/devices/${DEV}/net/" ]; then + echo "Failed to create netdevsim port" + exit 1 + fi + + # Wait for udev to rename newly created netdev. + udevadm settle + + NETDEV=$(ls ${NETDEVSIM_PATH}/devices/${DEV}/net/) +} + +cleanup() +{ + pre_cleanup + netdevsim_port_destroy + netdevsim_dev_destroy +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh new file mode 100755 index 000000000000..2f87c3be76a9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -0,0 +1,341 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the FIB offload API. It makes use of netdevsim +# which registers a listener to the FIB notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + ipv4_identical_routes + ipv4_tos + ipv4_metric + ipv4_replace + ipv4_delete + ipv4_plen + ipv4_replay + ipv4_flush + ipv4_error_path + ipv6_add + ipv6_metric + ipv6_append_single + ipv6_replace_single + ipv6_metric_multipath + ipv6_append_multipath + ipv6_replace_multipath + ipv6_append_multipath_to_single + ipv6_delete_single + ipv6_delete_multipath + ipv6_replay_single + ipv6_replay_multipath + ipv6_error_path +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source $lib_dir/fib_offload_lib.sh + +ipv4_identical_routes() +{ + fib_ipv4_identical_routes_test "testns1" +} + +ipv4_tos() +{ + fib_ipv4_tos_test "testns1" +} + +ipv4_metric() +{ + fib_ipv4_metric_test "testns1" +} + +ipv4_replace() +{ + fib_ipv4_replace_test "testns1" +} + +ipv4_delete() +{ + fib_ipv4_delete_test "testns1" +} + +ipv4_plen() +{ + fib_ipv4_plen_test "testns1" +} + +ipv4_replay_metric() +{ + fib_ipv4_replay_metric_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_tos() +{ + fib_ipv4_replay_tos_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay_plen() +{ + fib_ipv4_replay_plen_test "testns1" "$DEVLINK_DEV" +} + +ipv4_replay() +{ + ipv4_replay_metric + ipv4_replay_tos + ipv4_replay_plen +} + +ipv4_flush() +{ + fib_ipv4_flush_test "testns1" +} + +ipv4_error_path_add() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 \ + &> /dev/null + done + + log_test "IPv4 error path - add" + + ip -n testns1 link del dev dummy1 +} + +ipv4_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 192.0.2.${lsb}/32 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv4 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv4/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv4_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv4/fib" resource. + ipv4_error_path_add + ipv4_error_path_replay +} + +ipv6_add() +{ + fib_ipv6_add_test "testns1" +} + +ipv6_metric() +{ + fib_ipv6_metric_test "testns1" +} + +ipv6_append_single() +{ + fib_ipv6_append_single_test "testns1" +} + +ipv6_replace_single() +{ + fib_ipv6_replace_single_test "testns1" +} + +ipv6_metric_multipath() +{ + fib_ipv6_metric_multipath_test "testns1" +} + +ipv6_append_multipath() +{ + fib_ipv6_append_multipath_test "testns1" +} + +ipv6_replace_multipath() +{ + fib_ipv6_replace_multipath_test "testns1" +} + +ipv6_append_multipath_to_single() +{ + fib_ipv6_append_multipath_to_single_test "testns1" +} + +ipv6_delete_single() +{ + fib_ipv6_delete_single_test "testns1" +} + +ipv6_delete_multipath() +{ + fib_ipv6_delete_multipath_test "testns1" +} + +ipv6_replay_single() +{ + fib_ipv6_replay_single_test "testns1" "$DEVLINK_DEV" +} + +ipv6_replay_multipath() +{ + fib_ipv6_replay_multipath_test "testns1" "$DEVLINK_DEV" +} + +ipv6_error_path_add_single() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 \ + &> /dev/null + done + + log_test "IPv6 error path - add single" + + ip -n testns1 link del dev dummy1 +} + +ipv6_error_path_add_multipath() +{ + local lsb + + RET=0 + + for i in $(seq 1 2); do + ip -n testns1 link add name dummy$i type dummy + ip -n testns1 link set dev dummy$i up + ip -n testns1 address add 2001:db8:$i::1/64 dev dummy$i + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:10::${lsb}/128 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 &> /dev/null + done + + log_test "IPv6 error path - add multipath" + + for i in $(seq 1 2); do + ip -n testns1 link del dev dummy$i + done +} + +ipv6_error_path_replay() +{ + local lsb + + RET=0 + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV + + for lsb in $(seq 1 20); do + ip -n testns1 route add 2001:db8:1::${lsb}/128 dev dummy1 + done + + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 10 + devlink -N testns1 dev reload $DEVLINK_DEV &> /dev/null + + log_test "IPv6 error path - replay" + + ip -n testns1 link del dev dummy1 + + # Successfully reload after deleting all the routes. + devlink -N testns1 resource set $DEVLINK_DEV path IPv6/fib size 100 + devlink -N testns1 dev reload $DEVLINK_DEV +} + +ipv6_error_path() +{ + # Test the different error paths of the notifiers by limiting the size + # of the "IPv6/fib" resource. + ipv6_error_path_add_single + ipv6_error_path_add_multipath + ipv6_error_path_replay +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + ip netns add testns1 + if [ $? -ne 0 ]; then + echo "Failed to add netns \"testns1\"" + exit 1 + fi + + devlink dev reload $DEVLINK_DEV netns testns1 + if [ $? -ne 0 ]; then + echo "Failed to reload into netns \"testns1\"" + exit 1 + fi +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore new file mode 100644 index 000000000000..9ae8db44ec14 --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/.gitignore @@ -0,0 +1 @@ +epoll_wakeup_test diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile new file mode 100644 index 000000000000..78ae4aaf7141 --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +LDLIBS += -lpthread +TEST_GEN_PROGS := epoll_wakeup_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c new file mode 100644 index 000000000000..37a04dab56f0 --- /dev/null +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -0,0 +1,3074 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <poll.h> +#include <unistd.h> +#include <signal.h> +#include <pthread.h> +#include <sys/epoll.h> +#include <sys/socket.h> +#include "../../kselftest_harness.h" + +struct epoll_mtcontext +{ + int efd[3]; + int sfd[4]; + int count; + + pthread_t main; + pthread_t waiter; +}; + +static void signal_handler(int signum) +{ +} + +static void kill_timeout(struct epoll_mtcontext *ctx) +{ + usleep(1000000); + pthread_kill(ctx->main, SIGUSR1); + pthread_kill(ctx->waiter, SIGUSR1); +} + +static void *waiter_entry1a(void *data) +{ + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry1ap(void *data) +{ + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx->count, 1); + } + + return NULL; +} + +static void *waiter_entry1o(void *data) +{ + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry1op(void *data) +{ + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx->count, 1); + } + + return NULL; +} + +static void *waiter_entry2a(void *data) +{ + struct epoll_event events[2]; + struct epoll_mtcontext *ctx = data; + + if (epoll_wait(ctx->efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx->count, 1); + + return NULL; +} + +static void *waiter_entry2ap(void *data) +{ + struct pollfd pfd; + struct epoll_event events[2]; + struct epoll_mtcontext *ctx = data; + + pfd.fd = ctx->efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx->efd[0], events, 2, 0) > 0) + __sync_fetch_and_add(&ctx->count, 1); + } + + return NULL; +} + +static void *emitter_entry1(void *data) +{ + struct epoll_mtcontext *ctx = data; + + usleep(100000); + write(ctx->sfd[1], "w", 1); + + kill_timeout(ctx); + + return NULL; +} + +static void *emitter_entry2(void *data) +{ + struct epoll_mtcontext *ctx = data; + + usleep(100000); + write(ctx->sfd[1], "w", 1); + write(ctx->sfd[3], "w", 1); + + kill_timeout(ctx); + + return NULL; +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * s0 + */ +TEST(epoll1) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * s0 + */ +TEST(epoll2) +{ + int efd; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, &e, 1, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll3) +{ + int efd; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll4) +{ + int efd; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * s0 + */ +TEST(epoll5) +{ + int efd; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * s0 + */ +TEST(epoll6) +{ + int efd; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 1); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + ASSERT_EQ(poll(&pfd, 1, 0), 0); + ASSERT_EQ(epoll_wait(efd, &e, 1, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ + +TEST(epoll7) +{ + int efd; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll8) +{ + int efd; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd = epoll_create(1); + ASSERT_GE(efd, 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 2); + + pfd.fd = efd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd, events, 2, 0), 0); + + close(efd); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * s0 + */ +TEST(epoll9) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * s0 + */ +TEST(epoll10) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll11) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll12) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * s0 + */ +TEST(epoll13) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * s0 + */ +TEST(epoll14) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (lt) / \ (lt) + * s0 s2 + */ +TEST(epoll15) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry2ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 2, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (et) / \ (et) + * s0 s2 + */ +TEST(epoll16) +{ + pthread_t emitter; + struct epoll_event events[2]; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[2], events), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], events, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll17) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll18) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll19) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll20) +{ + int efd[2]; + int sfd[2]; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll21) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll22) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll23) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 + * | (p) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll24) +{ + int efd[2]; + int sfd[2]; + struct pollfd pfd; + struct epoll_event e; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], &e), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], &e, 1, 0), 0); + + close(efd[0]); + close(efd[1]); + close(sfd[0]); + close(sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll25) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll26) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll27) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll28) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll29) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll30) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll31) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * | (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll32) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 1); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll33) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll34) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll35) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll36) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll37) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll38) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx.count, 2); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll39) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (ew) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll40) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1o, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_or(&ctx.count, 2); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll41) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll42) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll43) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (ew) | | (p) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll44) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (lt) + * s0 + */ +TEST(epoll45) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (lt) + * e1 + * | (et) + * s0 + */ +TEST(epoll46) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (et) + * e1 + * | (lt) + * s0 + */ +TEST(epoll47) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + pfd.fd = ctx.efd[1]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[1], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 t1 + * (p) | | (p) + * | e0 + * \ / (et) + * e1 + * | (et) + * s0 + */ +TEST(epoll48) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1op, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry1, &ctx), 0); + + if (epoll_wait(ctx.efd[1], &e, 1, -1) > 0) + __sync_fetch_and_or(&ctx.count, 2); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_TRUE((ctx.count == 2) || (ctx.count == 3)); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + +/* + * t0 + * | (ew) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll49) +{ + int efd[3]; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (ew) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll50) +{ + int efd[3]; + int sfd[4]; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll51) +{ + int efd[3]; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 + * | (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll52) +{ + int efd[3]; + int sfd[4]; + struct pollfd pfd; + struct epoll_event events[2]; + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &sfd[2]), 0); + + efd[0] = epoll_create(1); + ASSERT_GE(efd[0], 0); + + efd[1] = epoll_create(1); + ASSERT_GE(efd[1], 0); + + efd[2] = epoll_create(1); + ASSERT_GE(efd[2], 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[1], EPOLL_CTL_ADD, sfd[0], events), 0); + + events[0].events = EPOLLIN; + ASSERT_EQ(epoll_ctl(efd[2], EPOLL_CTL_ADD, sfd[2], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[1], events), 0); + + events[0].events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(efd[0], EPOLL_CTL_ADD, efd[2], events), 0); + + ASSERT_EQ(write(sfd[1], "w", 1), 1); + ASSERT_EQ(write(sfd[3], "w", 1), 1); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 1); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 2); + + pfd.fd = efd[0]; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + EXPECT_EQ(epoll_wait(efd[0], events, 2, 0), 0); + + close(efd[0]); + close(efd[1]); + close(efd[2]); + close(sfd[0]); + close(sfd[1]); + close(sfd[2]); + close(sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll53) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll54) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll55) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (ew) \ / (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll56) +{ + pthread_t emitter; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + if (epoll_wait(ctx.efd[0], &e, 1, -1) > 0) + __sync_fetch_and_add(&ctx.count, 1); + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (p) \ / (p) + * e0 + * (lt) / \ (lt) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll57) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + pfd.fd = ctx.efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +/* + * t0 t1 + * (p) \ / (p) + * e0 + * (et) / \ (et) + * e1 e2 + * (lt) | | (lt) + * s0 s2 + */ +TEST(epoll58) +{ + pthread_t emitter; + struct pollfd pfd; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[0]), 0); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx.sfd[2]), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + ctx.efd[1] = epoll_create(1); + ASSERT_GE(ctx.efd[1], 0); + + ctx.efd[2] = epoll_create(1); + ASSERT_GE(ctx.efd[2], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[1], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[2], EPOLL_CTL_ADD, ctx.sfd[2], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[1], &e), 0); + + e.events = EPOLLIN | EPOLLET; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.efd[2], &e), 0); + + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&ctx.waiter, NULL, waiter_entry1ap, &ctx), 0); + ASSERT_EQ(pthread_create(&emitter, NULL, emitter_entry2, &ctx), 0); + + pfd.fd = ctx.efd[0]; + pfd.events = POLLIN; + if (poll(&pfd, 1, -1) > 0) { + if (epoll_wait(ctx.efd[0], &e, 1, 0) > 0) + __sync_fetch_and_add(&ctx.count, 1); + } + + ASSERT_EQ(pthread_join(ctx.waiter, NULL), 0); + EXPECT_EQ(ctx.count, 2); + + if (pthread_tryjoin_np(emitter, NULL) < 0) { + pthread_kill(emitter, SIGUSR1); + pthread_join(emitter, NULL); + } + + close(ctx.efd[0]); + close(ctx.efd[1]); + close(ctx.efd[2]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); + close(ctx.sfd[2]); + close(ctx.sfd[3]); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index f901076aa2ea..56894477c8bd 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -116,6 +116,16 @@ config_set_name() echo -n $1 > $DIR/config_name } +config_set_into_buf() +{ + echo 1 > $DIR/config_into_buf +} + +config_unset_into_buf() +{ + echo 0 > $DIR/config_into_buf +} + config_set_sync_direct() { echo 1 > $DIR/config_sync_direct @@ -153,11 +163,14 @@ config_set_read_fw_idx() read_firmwares() { - if [ "$1" = "xzonly" ]; then - fwfile="${FW}-orig" + if [ "$(cat $DIR/config_into_buf)" == "1" ]; then + fwfile="$FW_INTO_BUF" else fwfile="$FW" fi + if [ "$1" = "xzonly" ]; then + fwfile="${fwfile}-orig" + fi for i in $(seq 0 3); do config_set_read_fw_idx $i # Verify the contents are what we expect. @@ -194,6 +207,18 @@ test_batched_request_firmware_nofile() echo "OK" } +test_batched_request_firmware_into_buf_nofile() +{ + echo -n "Batched request_firmware_into_buf() nofile try #$1: " + config_reset + config_set_name nope-test-firmware.bin + config_set_into_buf + config_trigger_sync + read_firmwares_expect_nofile + release_all_firmware + echo "OK" +} + test_batched_request_firmware_direct_nofile() { echo -n "Batched request_firmware_direct() nofile try #$1: " @@ -259,6 +284,18 @@ test_batched_request_firmware() echo "OK" } +test_batched_request_firmware_into_buf() +{ + echo -n "Batched request_firmware_into_buf() $2 try #$1: " + config_reset + config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME + config_set_into_buf + config_trigger_sync + read_firmwares $2 + release_all_firmware + echo "OK" +} + test_batched_request_firmware_direct() { echo -n "Batched request_firmware_direct() $2 try #$1: " @@ -308,6 +345,10 @@ for i in $(seq 1 5); do done for i in $(seq 1 5); do + test_batched_request_firmware_into_buf $i normal +done + +for i in $(seq 1 5); do test_batched_request_firmware_direct $i normal done @@ -328,6 +369,10 @@ for i in $(seq 1 5); do done for i in $(seq 1 5); do + test_batched_request_firmware_into_buf_nofile $i +done + +for i in $(seq 1 5); do test_batched_request_firmware_direct_nofile $i done @@ -351,6 +396,10 @@ for i in $(seq 1 5); do done for i in $(seq 1 5); do + test_batched_request_firmware_into_buf $i both +done + +for i in $(seq 1 5); do test_batched_request_firmware_direct $i both done @@ -371,6 +420,10 @@ for i in $(seq 1 5); do done for i in $(seq 1 5); do + test_batched_request_firmware_into_buf $i xzonly +done + +for i in $(seq 1 5); do test_batched_request_firmware_direct $i xzonly done diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index f236cc295450..5b8c0fedee76 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -9,6 +9,12 @@ DIR=/sys/devices/virtual/misc/test_firmware PROC_CONFIG="/proc/config.gz" TEST_DIR=$(dirname $0) +# We need to load a different file to test request_firmware_into_buf +# I believe the issue is firmware loaded cached vs. non-cached +# with same filename is bungled. +# To reproduce rename this to test-firmware.bin +TEST_FIRMWARE_INTO_BUF_FILENAME=test-firmware-into-buf.bin + # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -28,6 +34,12 @@ test_modprobe() check_mods() { + local uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "skip all tests: must be run as root" >&2 + exit $ksft_skip + fi + trap "test_modprobe" EXIT if [ ! -d $DIR ]; then modprobe test_firmware @@ -108,6 +120,8 @@ setup_tmp_file() FWPATH=$(mktemp -d) FW="$FWPATH/test-firmware.bin" echo "ABCD0123" >"$FW" + FW_INTO_BUF="$FWPATH/$TEST_FIRMWARE_INTO_BUF_FILENAME" + echo "EFGH4567" >"$FW_INTO_BUF" NAME=$(basename "$FW") if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path @@ -175,6 +189,9 @@ test_finish() if [ -f $FW ]; then rm -f "$FW" fi + if [ -f $FW_INTO_BUF ]; then + rm -f "$FW_INTO_BUF" + fi if [ -d $FWPATH ]; then rm -rf "$FWPATH" fi diff --git a/tools/testing/selftests/ftrace/settings b/tools/testing/selftests/ftrace/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/ftrace/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc new file mode 100644 index 000000000000..d75a8695bc21 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/direct/ftrace-direct.tc @@ -0,0 +1,69 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test ftrace direct functions against tracers + +rmmod ftrace-direct ||: +if ! modprobe ftrace-direct ; then + echo "No ftrace-direct sample module - please make CONFIG_SAMPLE_FTRACE_DIRECT=m" + exit_unresolved; +fi + +echo "Let the module run a little" +sleep 1 + +grep -q "my_direct_func: waking up" trace + +rmmod ftrace-direct + +test_tracer() { + tracer=$1 + + # tracer -> direct -> no direct > no tracer + echo $tracer > current_tracer + modprobe ftrace-direct + rmmod ftrace-direct + echo nop > current_tracer + + # tracer -> direct -> no tracer > no direct + echo $tracer > current_tracer + modprobe ftrace-direct + echo nop > current_tracer + rmmod ftrace-direct + + # direct -> tracer -> no tracer > no direct + modprobe ftrace-direct + echo $tracer > current_tracer + echo nop > current_tracer + rmmod ftrace-direct + + # direct -> tracer -> no direct > no notracer + modprobe ftrace-direct + echo $tracer > current_tracer + rmmod ftrace-direct + echo nop > current_tracer +} + +for t in `cat available_tracers`; do + if [ "$t" != "nop" ]; then + test_tracer $t + fi +done + +echo nop > current_tracer +rmmod ftrace-direct ||: + +# Now do the same thing with another direct function registered +echo "Running with another ftrace direct function" + +rmmod ftrace-direct-too ||: +modprobe ftrace-direct-too + +for t in `cat available_tracers`; do + if [ "$t" != "nop" ]; then + test_tracer $t + fi +done + +echo nop > current_tracer +rmmod ftrace-direct ||: +rmmod ftrace-direct-too ||: diff --git a/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc new file mode 100644 index 000000000000..801ecb63e84c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/direct/kprobe-direct.tc @@ -0,0 +1,84 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test ftrace direct functions against kprobes + +rmmod ftrace-direct ||: +if ! modprobe ftrace-direct ; then + echo "No ftrace-direct sample module - please build with CONFIG_SAMPLE_FTRACE_DIRECT=m" + exit_unresolved; +fi + +if [ ! -f kprobe_events ]; then + echo "No kprobe_events file -please build CONFIG_KPROBE_EVENTS" + exit_unresolved; +fi + +echo "Let the module run a little" +sleep 1 + +grep -q "my_direct_func: waking up" trace + +rmmod ftrace-direct + +echo 'p:kwake wake_up_process task=$arg1' > kprobe_events + +start_direct() { + echo > trace + modprobe ftrace-direct + sleep 1 + grep -q "my_direct_func: waking up" trace +} + +stop_direct() { + rmmod ftrace-direct +} + +enable_probe() { + echo > trace + echo 1 > events/kprobes/kwake/enable + sleep 1 + grep -q "kwake:" trace +} + +disable_probe() { + echo 0 > events/kprobes/kwake/enable +} + +test_kprobes() { + # probe -> direct -> no direct > no probe + enable_probe + start_direct + stop_direct + disable_probe + + # probe -> direct -> no probe > no direct + enable_probe + start_direct + disable_probe + stop_direct + + # direct -> probe -> no probe > no direct + start_direct + enable_probe + disable_probe + stop_direct + + # direct -> probe -> no direct > no noprobe + start_direct + enable_probe + stop_direct + disable_probe +} + +test_kprobes + +# Now do this with a second registered direct function +echo "Running with another ftrace direct function" + +modprobe ftrace-direct-too + +test_kprobes + +rmmod ftrace-direct-too + +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 27a54a17da65..f4e92afab14b 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -30,7 +30,7 @@ ftrace_filter_check '*schedule*' '^.*schedule.*$' ftrace_filter_check 'schedule*' '^schedule.*$' # filter by *mid*end -ftrace_filter_check '*aw*lock' '.*aw.*lock$' +ftrace_filter_check '*pin*lock' '.*pin.*lock$' # filter by start*mid* ftrace_filter_check 'mutex*try*' '^mutex.*try.*' diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 36fb59f886ea..1a52f2883fe0 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -3,6 +3,8 @@ # description: ftrace - stacktrace filter command # flags: instance +[ ! -f set_ftrace_filter ] && exit_unsupported + echo _do_fork:stacktrace >> set_ftrace_filter grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc index 86a1f07ef2ca..71fa3f49e35e 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc @@ -15,6 +15,11 @@ if [ $NP -eq 1 ] ;then exit_unresolved fi +if ! grep -q "function" available_tracers ; then + echo "Function trace is not enabled" + exit_unsupported +fi + ORIG_CPUMASK=`cat tracing_cpumask` do_reset() { diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 1d96c5f7e402..5d4550591ff9 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -46,6 +46,9 @@ reset_events_filter() { # reset all current setting filters } reset_ftrace_filter() { # reset all triggers in set_ftrace_filter + if [ ! -f set_ftrace_filter ]; then + return 0 + fi echo > set_ftrace_filter grep -v '^#' set_ftrace_filter | while read t; do tr=`echo $t | cut -d: -f2` @@ -93,7 +96,7 @@ initialize_ftrace() { # Reset ftrace to initial-state disable_events [ -f set_event_pid ] && echo > set_event_pid [ -f set_ftrace_pid ] && echo > set_ftrace_pid - [ -f set_ftrace_filter ] && echo | tee set_ftrace_* + [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace [ -f set_graph_function ] && echo | tee set_graph_* [ -f stack_trace_filter ] && echo > stack_trace_filter [ -f kprobe_events ] && echo > kprobe_events @@ -115,7 +118,7 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file command=$(echo "$2" | tr -d ^) echo "Test command: $command" echo > error_log - (! echo "$command" > "$3" ) 2> /dev/null + (! echo "$command" >> "$3" ) 2> /dev/null grep "$1: error:" -A 3 error_log N=$(tail -n 1 error_log | wc -c) # " Command: " and "^\n" => 13 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc index 3fb70e01b1fe..3ff236719b6e 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc @@ -24,7 +24,21 @@ test -d events/kprobes2/event2 || exit_failure :;: "Add an event on dot function without name" ;: -FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "` +find_dot_func() { + if [ ! -f available_filter_functions ]; then + grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " " + return; + fi + + grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do + if grep -s $f available_filter_functions; then + echo $f + break + fi + done +} + +FUNC=`find_dot_func | tail -n 1` [ "x" != "x$FUNC" ] || exit_unresolved echo "p $FUNC" > kprobe_events EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:` diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc new file mode 100644 index 000000000000..44494bac86d1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -0,0 +1,35 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Create/delete multiprobe on kprobe event + +[ -f kprobe_events ] || exit_unsupported + +grep -q "Create/append/" README || exit_unsupported + +# Choose 2 symbols for target +SYM1=_do_fork +SYM2=do_exit +EVENT_NAME=kprobes/testevent + +DEF1="p:$EVENT_NAME $SYM1" +DEF2="p:$EVENT_NAME $SYM2" + +:;: "Define an event which has 2 probes" ;: +echo $DEF1 >> kprobe_events +echo $DEF2 >> kprobe_events +cat kprobe_events | grep "$DEF1" +cat kprobe_events | grep "$DEF2" + +:;: "Remove the event by name (should remove both)" ;: +echo "-:$EVENT_NAME" >> kprobe_events +test `cat kprobe_events | wc -l` -eq 0 + +:;: "Remove just 1 event" ;: +echo $DEF1 >> kprobe_events +echo $DEF2 >> kprobe_events +echo "-:$EVENT_NAME $SYM1" >> kprobe_events +! cat kprobe_events | grep "$DEF1" +cat kprobe_events | grep "$DEF2" + +:;: "Appending different type must fail" ;: +! echo "$DEF1 \$stack" >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 29faaec942c6..ef1e9bafb098 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -41,6 +41,11 @@ check_error 'p vfs_read ^%none_reg' # BAD_REG_NAME check_error 'p vfs_read ^@12345678abcde' # BAD_MEM_ADDR check_error 'p vfs_read ^@+10' # FILE_ON_KPROBE +grep -q "imm-value" README && \ +check_error 'p vfs_read arg1=\^x' # BAD_IMM +grep -q "imm-string" README && \ +check_error 'p vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE + check_error 'p vfs_read ^+0@0)' # DEREF_NEED_BRACE check_error 'p vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS check_error 'p vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE @@ -82,4 +87,15 @@ case $(uname -m) in ;; esac +# multiprobe errors +if grep -q "Create/append/" README && grep -q "imm-value" README; then +echo 'p:kprobes/testevent _do_fork' > kprobe_events +check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE +echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events +check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE +check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index 5862eee91e1d..6e3dbe5f96b7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -20,9 +20,9 @@ while read i; do test $N -eq 256 && break done -L=`wc -l kprobe_events` -if [ $L -ne $N ]; then - echo "The number of kprobes events ($L) is not $N" +L=`cat kprobe_events | wc -l` +if [ $L -ne 256 ]; then + echo "The number of kprobes events ($L) is not 256" exit_fail fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc index 1221240f8cf6..3f2aee115f6e 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc @@ -21,10 +21,10 @@ grep -q "snapshot()" README || exit_unsupported # version issue echo "Test expected snapshot action failure" -echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail +echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> events/sched/sched_waking/trigger && exit_fail echo "Test expected save action failure" -echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail +echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> events/sched/sched_waking/trigger && exit_fail exit_xfail diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc index 064a284e4e75..c80007aa9f86 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc @@ -16,7 +16,7 @@ grep -q "onchange(var)" README || exit_unsupported # version issue echo "Test onchange action" -echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger +echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger ping $LOCALHOST -c 3 nice -n 1 ping $LOCALHOST -c 3 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc index 18fff69fc433..f546c1b66a9b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc @@ -23,9 +23,9 @@ grep -q "snapshot()" README || exit_unsupported # version issue echo "Test snapshot action" -echo 1 > /sys/kernel/debug/tracing/events/sched/enable +echo 1 > events/sched/enable -echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger +echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger ping $LOCALHOST -c 3 nice -n 1 ping $LOCALHOST -c 3 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc new file mode 100644 index 000000000000..d44087a2f3d1 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-syntax-errors.tc @@ -0,0 +1,32 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test histogram parser errors + +if [ ! -f set_event -o ! -d events/kmem ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f events/kmem/kmalloc/trigger ]; then + echo "event trigger is not supported" + exit_unsupported +fi + +if [ ! -f events/kmem/kmalloc/hist ]; then + echo "hist trigger is not supported" + exit_unsupported +fi + +[ -f error_log ] || exit_unsupported + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'hist:kmem:kmalloc' "$1" 'events/kmem/kmalloc/trigger' +} + +check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid,^junk' # INVALID_SORT_FIELD +check_error 'hist:keys=common_pid:vals=bytes_req:^sort=' # EMPTY_ASSIGNMENT +check_error 'hist:keys=common_pid:vals=bytes_req:^sort=common_pid,' # EMPTY_SORT_FIELD +check_error 'hist:keys=common_pid:vals=bytes_req:sort=common_pid.^junk' # INVALID_SORT_MODIFIER +check_error 'hist:keys=common_pid:vals=bytes_req,bytes_alloc:^sort=common_pid,bytes_req,bytes_alloc' # TOO_MANY_SORT_FIELDS + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc index 7717c0a09686..ac738500d17f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc @@ -28,7 +28,7 @@ if [ -z "$FEATURE" ]; then exit_unsupported fi -echo "Test snapshot tigger" +echo "Test snapshot trigger" echo 0 > snapshot echo 1 > events/sched/sched_process_fork/enable ( echo "forked") diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh index a27e2eec3586..8b2b6088540d 100755 --- a/tools/testing/selftests/gen_kselftest_tar.sh +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -38,16 +38,21 @@ main() esac fi - install_dir=./kselftest + # Create working directory. + dest=`pwd` + install_work="$dest"/kselftest_install + install_name=kselftest + install_dir="$install_work"/"$install_name" + mkdir -p "$install_dir" -# Run install using INSTALL_KSFT_PATH override to generate install -# directory -./kselftest_install.sh -tar $copts kselftest${ext} $install_dir -echo "Kselftest archive kselftest${ext} created!" + # Run install using INSTALL_KSFT_PATH override to generate install + # directory + ./kselftest_install.sh "$install_dir" + (cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name) + echo "Kselftest archive kselftest${ext} created!" -# clean up install directory -rm -rf kselftest + # clean up top-level install work directory + rm -rf "$install_work" } main "$@" diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index e700e09e3682..af7f9c7d59bc 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -54,9 +54,9 @@ static const struct { { RC_PROTO_RC6_MCE, "rc-6-mce", 0x00007fff, "rc-6" }, { RC_PROTO_SHARP, "sharp", 0x1fff, "sharp" }, { RC_PROTO_IMON, "imon", 0x7fffffff, "imon" }, - { RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rcmm" }, - { RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rcmm" }, - { RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rcmm" }, + { RC_PROTO_RCMM12, "rcmm-12", 0x00000fff, "rc-mm" }, + { RC_PROTO_RCMM24, "rcmm-24", 0x00ffffff, "rc-mm" }, + { RC_PROTO_RCMM32, "rcmm-32", 0xffffffff, "rc-mm" }, }; int lirc_open(const char *rc) diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh index fa7c24e8eefb..2ff600388c30 100755 --- a/tools/testing/selftests/kexec/test_kexec_file_load.sh +++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh @@ -37,11 +37,20 @@ is_ima_sig_required() # sequentially. As a result, a policy rule may be defined, but # might not necessarily be used. This test assumes if a policy # rule is specified, that is the intent. + + # First check for appended signature (modsig), then xattr if [ $ima_read_policy -eq 1 ]; then check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \ - "appraise_type=imasig" + "appraise_type=imasig|modsig" ret=$? - [ $ret -eq 1 ] && log_info "IMA signature required"; + if [ $ret -eq 1 ]; then + log_info "IMA or appended(modsig) signature required" + else + check_ima_policy "appraise" "func=KEXEC_KERNEL_CHECK" \ + "appraise_type=imasig" + ret=$? + [ $ret -eq 1 ] && log_info "IMA signature required"; + fi fi return $ret } @@ -84,6 +93,22 @@ check_for_imasig() return $ret } +# Return 1 for appended signature (modsig) found and 0 for not found. +check_for_modsig() +{ + local module_sig_string="~Module signature appended~" + local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)" + local ret=0 + + if [ "$sig" == "$module_sig_string" ]; then + ret=1 + log_info "kexec kernel image modsig signed" + else + log_info "kexec kernel image not modsig signed" + fi + return $ret +} + kexec_file_load_test() { local succeed_msg="kexec_file_load succeeded" @@ -98,7 +123,8 @@ kexec_file_load_test() # In secureboot mode with an architecture specific # policy, make sure either an IMA or PE signature exists. if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ] && \ - [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ]; then + [ $ima_signed -eq 0 ] && [ $pe_signed -eq 0 ] \ + && [ $ima_modsig -eq 0 ]; then log_fail "$succeed_msg (missing sig)" fi @@ -107,7 +133,8 @@ kexec_file_load_test() log_fail "$succeed_msg (missing PE sig)" fi - if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ]; then + if [ $ima_sig_required -eq 1 ] && [ $ima_signed -eq 0 ] \ + && [ $ima_modsig -eq 0 ]; then log_fail "$succeed_msg (missing IMA sig)" fi @@ -204,5 +231,8 @@ pe_signed=$? check_for_imasig ima_signed=$? +check_for_modsig +ima_modsig=$? + # Test loading the kernel image via kexec_file_load syscall kexec_file_load_test diff --git a/tools/testing/selftests/kselftest_module.sh b/tools/testing/selftests/kselftest/module.sh index 18e1c7992d30..fb4733faff12 100755 --- a/tools/testing/selftests/kselftest_module.sh +++ b/tools/testing/selftests/kselftest/module.sh @@ -9,7 +9,7 @@ # # #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ -# $(dirname $0)/../kselftest_module.sh "description" module_name +# $(dirname $0)/../kselftest/module.sh "description" module_name # # Example: tools/testing/selftests/lib/printf.sh diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl index ec7e48118183..31f7c2a0a8bd 100755 --- a/tools/testing/selftests/kselftest/prefix.pl +++ b/tools/testing/selftests/kselftest/prefix.pl @@ -3,6 +3,7 @@ # Prefix all lines with "# ", unbuffered. Command being piped in may need # to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd". use strict; +use IO::Handle; binmode STDIN; binmode STDOUT; diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 00c9020bdda8..e84d901f8567 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -3,9 +3,14 @@ # # Runs a set of tests in a given subdirectory. export skip_rc=4 +export timeout_rc=124 export logfile=/dev/stdout export per_test_logging= +# Defaults for "settings" file fields: +# "timeout" how many seconds to let each test run before failing. +export kselftest_default_timeout=45 + # There isn't a shell-agnostic way to find the path of a sourced file, # so we must rely on BASE_DIR being set to find other tools. if [ -z "$BASE_DIR" ]; then @@ -24,6 +29,16 @@ tap_prefix() fi } +tap_timeout() +{ + # Make sure tests will time out if utility is available. + if [ -x /usr/bin/timeout ] ; then + /usr/bin/timeout "$kselftest_timeout" "$1" + else + "$1" + fi +} + run_one() { DIR="$1" @@ -32,6 +47,18 @@ run_one() BASENAME_TEST=$(basename $TEST) + # Reset any "settings"-file variables. + export kselftest_timeout="$kselftest_default_timeout" + # Load per-test-directory kselftest "settings" file. + settings="$BASE_DIR/$DIR/settings" + if [ -r "$settings" ] ; then + while read line ; do + field=$(echo "$line" | cut -d= -f1) + value=$(echo "$line" | cut -d= -f2-) + eval "kselftest_$field"="$value" + done < "$settings" + fi + TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" if [ ! -x "$TEST" ]; then @@ -44,14 +71,18 @@ run_one() echo "not ok $test_num $TEST_HDR_MSG" else cd `dirname $TEST` > /dev/null - (((((./$BASENAME_TEST 2>&1; echo $? >&3) | + ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) | tap_prefix >&4) 3>&1) | (read xs; exit $xs)) 4>>"$logfile" && echo "ok $test_num $TEST_HDR_MSG") || - (if [ $? -eq $skip_rc ]; then \ + (rc=$?; \ + if [ $rc -eq $skip_rc ]; then \ echo "not ok $test_num $TEST_HDR_MSG # SKIP" + elif [ $rc -eq $timeout_rc ]; then \ + echo "#" + echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT" else - echo "not ok $test_num $TEST_HDR_MSG" + echo "not ok $test_num $TEST_HDR_MSG # exit=$rc" fi) cd - >/dev/null fi @@ -60,7 +91,7 @@ run_one() run_many() { echo "TAP version 13" - DIR=$(basename "$PWD") + DIR="${PWD#${BASE_DIR}/}" test_num=0 total=$(echo "$@" | wc -w) echo "1..$total" diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh index ec304463883c..407af7da7037 100755 --- a/tools/testing/selftests/kselftest_install.sh +++ b/tools/testing/selftests/kselftest_install.sh @@ -6,30 +6,30 @@ # Author: Shuah Khan <shuahkh@osg.samsung.com> # Copyright (C) 2015 Samsung Electronics Co., Ltd. -install_loc=`pwd` - main() { - if [ $(basename $install_loc) != "selftests" ]; then + base_dir=`pwd` + install_dir="$base_dir"/kselftest_install + + # Make sure we're in the selftests top-level directory. + if [ $(basename "$base_dir") != "selftests" ]; then echo "$0: Please run it in selftests directory ..." exit 1; fi + + # Only allow installation into an existing location. if [ "$#" -eq 0 ]; then - echo "$0: Installing in default location - $install_loc ..." + echo "$0: Installing in default location - $install_dir ..." elif [ ! -d "$1" ]; then echo "$0: $1 doesn't exist!!" exit 1; else - install_loc=$1 - echo "$0: Installing in specified location - $install_loc ..." + install_dir="$1" + echo "$0: Installing in specified location - $install_dir ..." fi - install_dir=$install_loc/kselftest - -# Create install directory - mkdir -p $install_dir -# Build tests - INSTALL_PATH=$install_dir make install + # Build tests + KSFT_INSTALL_PATH="$install_dir" make install } main "$@" diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index b35da375530a..30072c3f52fb 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,4 +1,5 @@ /s390x/sync_regs_test +/s390x/memop /x86_64/cr4_cpuid_sync_test /x86_64/evmcs_test /x86_64/hyperv_cpuid @@ -9,8 +10,10 @@ /x86_64/state_test /x86_64/sync_regs_test /x86_64/vmx_close_while_nested_test +/x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test +/x86_64/xss_msr_test /clear_dirty_log_test /dirty_log_test /kvm_create_max_vcpus diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ba7849751989..d91c53b726e6 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -7,10 +7,10 @@ top_srcdir = ../../../.. KSFT_KHDR_INSTALL := 1 UNAME_M := $(shell uname -m) -LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c -LIBKVM_aarch64 = lib/aarch64/processor.c -LIBKVM_s390x = lib/s390x/processor.c +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c +LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test @@ -22,8 +22,11 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test +TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus @@ -32,7 +35,10 @@ TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus +TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/sync_regs_test +TEST_GEN_PROGS_s390x += s390x/resets +TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) @@ -41,12 +47,14 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include +LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(UNAME_M) -I.. + -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ + -I$(<D) -Iinclude/$(UNAME_M) -I.. no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie) # On s390, build the testcases KVM-enabled pgste-option = $(call try-run, echo 'int main() { return 0; }' | \ diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index ceb52b952637..5614222a6628 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -19,15 +19,13 @@ #include "kvm_util.h" #include "processor.h" -#define DEBUG printf - #define VCPU_ID 1 /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 -/* Default guest test memory offset, 1G */ -#define DEFAULT_GUEST_TEST_MEM 0x40000000 +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 /* How many pages to dirty for each guest loop */ #define TEST_PAGES_PER_LOOP 1024 @@ -38,6 +36,27 @@ /* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL +/* Dirty bitmaps are always little endian, so we need to swap on big endian */ +#if defined(__s390x__) +# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) +# define test_bit_le(nr, addr) \ + test_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define set_bit_le(nr, addr) \ + set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define clear_bit_le(nr, addr) \ + clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define test_and_set_bit_le(nr, addr) \ + test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +# define test_and_clear_bit_le(nr, addr) \ + test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr) +#else +# define test_bit_le test_bit +# define set_bit_le set_bit +# define clear_bit_le clear_bit +# define test_and_set_bit_le test_and_set_bit +# define test_and_clear_bit_le test_and_clear_bit +#endif + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -69,11 +88,23 @@ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; */ static void guest_code(void) { + uint64_t addr; int i; + /* + * On s390x, all pages of a 1M segment are initially marked as dirty + * when a page of the segment is written to for the very first time. + * To compensate this specialty in this test, we need to touch all + * pages during the first iteration. + */ + for (i = 0; i < guest_num_pages; i++) { + addr = guest_test_virt_mem + i * guest_page_size; + *(uint64_t *)addr = READ_ONCE(iteration); + } + while (true) { for (i = 0; i < TEST_PAGES_PER_LOOP; i++) { - uint64_t addr = guest_test_virt_mem; + addr = guest_test_virt_mem; addr += (READ_ONCE(random_array[i]) % guest_num_pages) * guest_page_size; addr &= ~(host_page_size - 1); @@ -158,15 +189,15 @@ static void vm_dirty_log_verify(unsigned long *bmap) value_ptr = host_test_mem + page * host_page_size; /* If this is a special page that we were tracking... */ - if (test_and_clear_bit(page, host_bmap_track)) { + if (test_and_clear_bit_le(page, host_bmap_track)) { host_track_next_count++; - TEST_ASSERT(test_bit(page, bmap), + TEST_ASSERT(test_bit_le(page, bmap), "Page %"PRIu64" should have its dirty bit " "set in this iteration but it is missing", page); } - if (test_bit(page, bmap)) { + if (test_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto @@ -209,21 +240,19 @@ static void vm_dirty_log_verify(unsigned long *bmap) * should report its dirtyness in the * next run */ - set_bit(page, host_bmap_track); + set_bit_le(page, host_bmap_track); } } } } static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, - uint64_t extra_mem_pages, void *guest_code, - unsigned long type) + uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, - O_RDWR, type); + vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); @@ -232,85 +261,61 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, return vm; } +#define DIRTY_MEM_BITS 30 /* 1G */ +#define PAGE_SHIFT_4K 12 + static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { - unsigned int guest_pa_bits, guest_page_shift; pthread_t vcpu_thread; struct kvm_vm *vm; - uint64_t max_gfn; unsigned long *bmap; - unsigned long type = 0; - - switch (mode) { - case VM_MODE_P52V48_4K: - guest_pa_bits = 52; - guest_page_shift = 12; - break; - case VM_MODE_P52V48_64K: - guest_pa_bits = 52; - guest_page_shift = 16; - break; - case VM_MODE_P48V48_4K: - guest_pa_bits = 48; - guest_page_shift = 12; - break; - case VM_MODE_P48V48_64K: - guest_pa_bits = 48; - guest_page_shift = 16; - break; - case VM_MODE_P40V48_4K: - guest_pa_bits = 40; - guest_page_shift = 12; - break; - case VM_MODE_P40V48_64K: - guest_pa_bits = 40; - guest_page_shift = 16; - break; - default: - TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); - } - DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - -#ifdef __x86_64__ /* - * FIXME - * The x86_64 kvm selftests framework currently only supports a - * single PML4 which restricts the number of physical address - * bits we can change to 39. + * We reserve page table for 2 times of extra dirty mem which + * will definitely cover the original (1G+) test range. Here + * we do the calculation with 4K page size which is the + * smallest so the page number will be enough for all archs + * (e.g., 64K page size guest will need even less memory for + * page tables). */ - guest_pa_bits = 39; -#endif -#ifdef __aarch64__ - if (guest_pa_bits != 40) - type = KVM_VM_TYPE_ARM_IPA_SIZE(guest_pa_bits); -#endif - max_gfn = (1ul << (guest_pa_bits - guest_page_shift)) - 1; - guest_page_size = (1ul << guest_page_shift); + vm = create_vm(mode, VCPU_ID, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), + guest_code); + + guest_page_size = vm_get_page_size(vm); /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (30 - guest_page_shift)) + 16; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - + vm_get_page_shift(vm))) + 16; +#ifdef __s390x__ + /* Round up to multiple of 1M (segment size) */ + guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL; +#endif host_page_size = getpagesize(); host_num_pages = (guest_num_pages * guest_page_size) / host_page_size + !!((guest_num_pages * guest_page_size) % host_page_size); if (!phys_offset) { - guest_test_phys_mem = (max_gfn - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm_get_max_gfn(vm) - + guest_num_pages) * guest_page_size; guest_test_phys_mem &= ~(host_page_size - 1); } else { guest_test_phys_mem = phys_offset; } +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); - vm = create_vm(mode, VCPU_ID, guest_num_pages, guest_code, type); - #ifdef USE_CLEAR_DIRTY_LOG struct kvm_enable_cap cap = {}; @@ -337,7 +342,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); #endif #ifdef __aarch64__ - ucall_init(vm, UCALL_MMIO, NULL); + ucall_init(vm, NULL); #endif /* Export the shared variables to the guest */ @@ -440,7 +445,7 @@ int main(int argc, char *argv[]) #endif #ifdef __x86_64__ - vm_guest_mode_params_init(VM_MODE_P52V48_4K, true, true); + vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true); #endif #ifdef __aarch64__ vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); @@ -454,6 +459,9 @@ int main(int argc, char *argv[]) vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true); } #endif +#ifdef __s390x__ + vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true); +#endif while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { switch (opt) { diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h index 4059014d93ea..4912d23844bc 100644 --- a/tools/testing/selftests/kvm/include/evmcs.h +++ b/tools/testing/selftests/kvm/include/evmcs.h @@ -220,6 +220,8 @@ struct hv_enlightened_vmcs { struct hv_enlightened_vmcs *current_evmcs; struct hv_vp_assist_page *current_vp_assist; +int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id); + static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) { u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) | diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index e0e66b115ef2..ae0d14c2540a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -24,6 +24,12 @@ struct kvm_vm; typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +#ifndef NDEBUG +#define DEBUG(...) printf(__VA_ARGS__); +#else +#define DEBUG(...) +#endif + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 @@ -38,11 +44,14 @@ enum vm_guest_mode { VM_MODE_P48V48_64K, VM_MODE_P40V48_4K, VM_MODE_P40V48_64K, + VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */ NUM_VM_MODES, }; -#ifdef __aarch64__ +#if defined(__aarch64__) #define VM_MODE_DEFAULT VM_MODE_P40V48_4K +#elif defined(__x86_64__) +#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K #else #define VM_MODE_DEFAULT VM_MODE_P52V48_4K #endif @@ -60,8 +69,7 @@ int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type); +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -117,6 +125,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_fpu *fpu); +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); #ifdef __KVM_HAVE_VCPU_EVENTS void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_events *events); @@ -146,6 +160,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); bool vm_is_unrestricted_guest(struct kvm_vm *vm); +unsigned int vm_get_page_size(struct kvm_vm *vm); +unsigned int vm_get_page_shift(struct kvm_vm *vm); +unsigned int vm_get_max_gfn(struct kvm_vm *vm); + struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); @@ -165,12 +183,6 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) -/* ucall implementation types */ -typedef enum { - UCALL_PIO, - UCALL_MMIO, -} ucall_type_t; - /* Common ucalls */ enum { UCALL_NONE, @@ -186,7 +198,7 @@ struct ucall { uint64_t args[UCALL_MAX_ARGS]; }; -void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg); +void ucall_init(struct kvm_vm *vm, void *arg); void ucall_uninit(struct kvm_vm *vm); void ucall(uint64_t cmd, int nargs, ...); uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 80d19740d2dc..7428513a4c68 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -11,6 +11,8 @@ #include <assert.h> #include <stdint.h> +#include <asm/msr-index.h> + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -34,24 +36,24 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) -/* The enum values match the intruction encoding of each register */ -enum x86_register { - RAX = 0, - RCX, - RDX, - RBX, - RSP, - RBP, - RSI, - RDI, - R8, - R9, - R10, - R11, - R12, - R13, - R14, - R15, +/* General Registers in 64-Bit Mode */ +struct gpr64_regs { + u64 rax; + u64 rcx; + u64 rdx; + u64 rbx; + u64 rsp; + u64 rbp; + u64 rsi; + u64 rdi; + u64 r8; + u64 r9; + u64 r10; + u64 r11; + u64 r12; + u64 r13; + u64 r14; + u64 r15; }; struct desc64 { @@ -218,20 +220,20 @@ static inline void set_cr4(uint64_t val) __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); } -static inline uint64_t get_gdt_base(void) +static inline struct desc_ptr get_gdt(void) { struct desc_ptr gdt; __asm__ __volatile__("sgdt %[gdt]" : /* output */ [gdt]"=m"(gdt)); - return gdt.address; + return gdt; } -static inline uint64_t get_idt_base(void) +static inline struct desc_ptr get_idt(void) { struct desc_ptr idt; __asm__ __volatile__("sidt %[idt]" : /* output */ [idt]"=m"(idt)); - return idt.address; + return idt; } #define SET_XMM(__var, __xmm) \ @@ -308,6 +310,8 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state); +struct kvm_msr_list *kvm_get_msr_index_list(void); + struct kvm_cpuid2 *kvm_get_supported_cpuid(void); void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); @@ -322,9 +326,15 @@ kvm_get_supported_cpuid_entry(uint32_t function) } uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); +int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value); void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, uint64_t msr_value); +uint32_t kvm_get_cpuid_max_basic(void); +uint32_t kvm_get_cpuid_max_extended(void); +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); + /* * Basic CPU control in CR0 */ @@ -340,444 +350,6 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, #define X86_CR0_CD (1UL<<30) /* Cache Disable */ #define X86_CR0_PG (1UL<<31) /* Paging */ -/* - * CPU model specific register (MSR) numbers. - */ - -/* x86-64 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ -#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ - -/* EFER bits: */ -#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ -#define EFER_LME (1<<8) /* Long mode enable */ -#define EFER_LMA (1<<10) /* Long mode active (read-only) */ -#define EFER_NX (1<<11) /* No execute enable */ -#define EFER_SVME (1<<12) /* Enable virtualization */ -#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ -#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ - -/* Intel MSRs. Some also available on other CPUs */ - -#define MSR_PPIN_CTL 0x0000004e -#define MSR_PPIN 0x0000004f - -#define MSR_IA32_PERFCTR0 0x000000c1 -#define MSR_IA32_PERFCTR1 0x000000c2 -#define MSR_FSB_FREQ 0x000000cd -#define MSR_PLATFORM_INFO 0x000000ce -#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 -#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) - -#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 -#define NHM_C3_AUTO_DEMOTE (1UL << 25) -#define NHM_C1_AUTO_DEMOTE (1UL << 26) -#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - -#define MSR_MTRRcap 0x000000fe -#define MSR_IA32_BBL_CR_CTL 0x00000119 -#define MSR_IA32_BBL_CR_CTL3 0x0000011e - -#define MSR_IA32_SYSENTER_CS 0x00000174 -#define MSR_IA32_SYSENTER_ESP 0x00000175 -#define MSR_IA32_SYSENTER_EIP 0x00000176 - -#define MSR_IA32_MCG_CAP 0x00000179 -#define MSR_IA32_MCG_STATUS 0x0000017a -#define MSR_IA32_MCG_CTL 0x0000017b -#define MSR_IA32_MCG_EXT_CTL 0x000004d0 - -#define MSR_OFFCORE_RSP_0 0x000001a6 -#define MSR_OFFCORE_RSP_1 0x000001a7 -#define MSR_TURBO_RATIO_LIMIT 0x000001ad -#define MSR_TURBO_RATIO_LIMIT1 0x000001ae -#define MSR_TURBO_RATIO_LIMIT2 0x000001af - -#define MSR_LBR_SELECT 0x000001c8 -#define MSR_LBR_TOS 0x000001c9 -#define MSR_LBR_NHM_FROM 0x00000680 -#define MSR_LBR_NHM_TO 0x000006c0 -#define MSR_LBR_CORE_FROM 0x00000040 -#define MSR_LBR_CORE_TO 0x00000060 - -#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ -#define LBR_INFO_MISPRED BIT_ULL(63) -#define LBR_INFO_IN_TX BIT_ULL(62) -#define LBR_INFO_ABORT BIT_ULL(61) -#define LBR_INFO_CYCLES 0xffff - -#define MSR_IA32_PEBS_ENABLE 0x000003f1 -#define MSR_IA32_DS_AREA 0x00000600 -#define MSR_IA32_PERF_CAPABILITIES 0x00000345 -#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 - -#define MSR_IA32_RTIT_CTL 0x00000570 -#define MSR_IA32_RTIT_STATUS 0x00000571 -#define MSR_IA32_RTIT_ADDR0_A 0x00000580 -#define MSR_IA32_RTIT_ADDR0_B 0x00000581 -#define MSR_IA32_RTIT_ADDR1_A 0x00000582 -#define MSR_IA32_RTIT_ADDR1_B 0x00000583 -#define MSR_IA32_RTIT_ADDR2_A 0x00000584 -#define MSR_IA32_RTIT_ADDR2_B 0x00000585 -#define MSR_IA32_RTIT_ADDR3_A 0x00000586 -#define MSR_IA32_RTIT_ADDR3_B 0x00000587 -#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 -#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 -#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 - -#define MSR_MTRRfix64K_00000 0x00000250 -#define MSR_MTRRfix16K_80000 0x00000258 -#define MSR_MTRRfix16K_A0000 0x00000259 -#define MSR_MTRRfix4K_C0000 0x00000268 -#define MSR_MTRRfix4K_C8000 0x00000269 -#define MSR_MTRRfix4K_D0000 0x0000026a -#define MSR_MTRRfix4K_D8000 0x0000026b -#define MSR_MTRRfix4K_E0000 0x0000026c -#define MSR_MTRRfix4K_E8000 0x0000026d -#define MSR_MTRRfix4K_F0000 0x0000026e -#define MSR_MTRRfix4K_F8000 0x0000026f -#define MSR_MTRRdefType 0x000002ff - -#define MSR_IA32_CR_PAT 0x00000277 - -#define MSR_IA32_DEBUGCTLMSR 0x000001d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x000001db -#define MSR_IA32_LASTBRANCHTOIP 0x000001dc -#define MSR_IA32_LASTINTFROMIP 0x000001dd -#define MSR_IA32_LASTINTTOIP 0x000001de - -/* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ -#define DEBUGCTLMSR_BTF_SHIFT 1 -#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ -#define DEBUGCTLMSR_TR (1UL << 6) -#define DEBUGCTLMSR_BTS (1UL << 7) -#define DEBUGCTLMSR_BTINT (1UL << 8) -#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) -#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) -#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) -#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 -#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) - -#define MSR_PEBS_FRONTEND 0x000003f7 - -#define MSR_IA32_POWER_CTL 0x000001fc - -#define MSR_IA32_MC0_CTL 0x00000400 -#define MSR_IA32_MC0_STATUS 0x00000401 -#define MSR_IA32_MC0_ADDR 0x00000402 -#define MSR_IA32_MC0_MISC 0x00000403 - -/* C-state Residency Counters */ -#define MSR_PKG_C3_RESIDENCY 0x000003f8 -#define MSR_PKG_C6_RESIDENCY 0x000003f9 -#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa -#define MSR_PKG_C7_RESIDENCY 0x000003fa -#define MSR_CORE_C3_RESIDENCY 0x000003fc -#define MSR_CORE_C6_RESIDENCY 0x000003fd -#define MSR_CORE_C7_RESIDENCY 0x000003fe -#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff -#define MSR_PKG_C2_RESIDENCY 0x0000060d -#define MSR_PKG_C8_RESIDENCY 0x00000630 -#define MSR_PKG_C9_RESIDENCY 0x00000631 -#define MSR_PKG_C10_RESIDENCY 0x00000632 - -/* Interrupt Response Limit */ -#define MSR_PKGC3_IRTL 0x0000060a -#define MSR_PKGC6_IRTL 0x0000060b -#define MSR_PKGC7_IRTL 0x0000060c -#define MSR_PKGC8_IRTL 0x00000633 -#define MSR_PKGC9_IRTL 0x00000634 -#define MSR_PKGC10_IRTL 0x00000635 - -/* Run Time Average Power Limiting (RAPL) Interface */ - -#define MSR_RAPL_POWER_UNIT 0x00000606 - -#define MSR_PKG_POWER_LIMIT 0x00000610 -#define MSR_PKG_ENERGY_STATUS 0x00000611 -#define MSR_PKG_PERF_STATUS 0x00000613 -#define MSR_PKG_POWER_INFO 0x00000614 - -#define MSR_DRAM_POWER_LIMIT 0x00000618 -#define MSR_DRAM_ENERGY_STATUS 0x00000619 -#define MSR_DRAM_PERF_STATUS 0x0000061b -#define MSR_DRAM_POWER_INFO 0x0000061c - -#define MSR_PP0_POWER_LIMIT 0x00000638 -#define MSR_PP0_ENERGY_STATUS 0x00000639 -#define MSR_PP0_POLICY 0x0000063a -#define MSR_PP0_PERF_STATUS 0x0000063b - -#define MSR_PP1_POWER_LIMIT 0x00000640 -#define MSR_PP1_ENERGY_STATUS 0x00000641 -#define MSR_PP1_POLICY 0x00000642 - -/* Config TDP MSRs */ -#define MSR_CONFIG_TDP_NOMINAL 0x00000648 -#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 -#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A -#define MSR_CONFIG_TDP_CONTROL 0x0000064B -#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C - -#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D - -#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 -#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 -#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A -#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B - -#define MSR_CORE_C1_RES 0x00000660 -#define MSR_MODULE_C6_RES_MS 0x00000664 - -#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 -#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 - -#define MSR_ATOM_CORE_RATIOS 0x0000066a -#define MSR_ATOM_CORE_VIDS 0x0000066b -#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c -#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d - - -#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 -#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 -#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 - -/* Hardware P state interface */ -#define MSR_PPERF 0x0000064e -#define MSR_PERF_LIMIT_REASONS 0x0000064f -#define MSR_PM_ENABLE 0x00000770 -#define MSR_HWP_CAPABILITIES 0x00000771 -#define MSR_HWP_REQUEST_PKG 0x00000772 -#define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 -#define MSR_HWP_STATUS 0x00000777 - -/* CPUID.6.EAX */ -#define HWP_BASE_BIT (1<<7) -#define HWP_NOTIFICATIONS_BIT (1<<8) -#define HWP_ACTIVITY_WINDOW_BIT (1<<9) -#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) -#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) - -/* IA32_HWP_CAPABILITIES */ -#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) -#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) -#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) -#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) - -/* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) -#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) -#define HWP_EPP_PERFORMANCE 0x00 -#define HWP_EPP_BALANCE_PERFORMANCE 0x80 -#define HWP_EPP_BALANCE_POWERSAVE 0xC0 -#define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) - -/* IA32_HWP_STATUS */ -#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) - -/* IA32_HWP_INTERRUPT */ -#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) -#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) - -#define MSR_AMD64_MC0_MASK 0xc0010044 - -#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) -#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) -#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) -#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) - -#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) - -/* These are consecutive and not in the normal 4er MCE bank block */ -#define MSR_IA32_MC0_CTL2 0x00000280 -#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) - -#define MSR_P6_PERFCTR0 0x000000c1 -#define MSR_P6_PERFCTR1 0x000000c2 -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - -#define MSR_KNC_PERFCTR0 0x00000020 -#define MSR_KNC_PERFCTR1 0x00000021 -#define MSR_KNC_EVNTSEL0 0x00000028 -#define MSR_KNC_EVNTSEL1 0x00000029 - -/* Alternative perfctr range with full access. */ -#define MSR_IA32_PMC0 0x000004c1 - -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ - -#define MSR_AMD64_PATCH_LEVEL 0x0000008b -#define MSR_AMD64_TSC_RATIO 0xc0000104 -#define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_PATCH_LOADER 0xc0010020 -#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 -#define MSR_AMD64_OSVW_STATUS 0xc0010141 -#define MSR_AMD64_LS_CFG 0xc0011020 -#define MSR_AMD64_DC_CFG 0xc0011022 -#define MSR_AMD64_BU_CFG2 0xc001102a -#define MSR_AMD64_IBSFETCHCTL 0xc0011030 -#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 -#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 -#define MSR_AMD64_IBSFETCH_REG_COUNT 3 -#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) -#define MSR_AMD64_IBSOPCTL 0xc0011033 -#define MSR_AMD64_IBSOPRIP 0xc0011034 -#define MSR_AMD64_IBSOPDATA 0xc0011035 -#define MSR_AMD64_IBSOPDATA2 0xc0011036 -#define MSR_AMD64_IBSOPDATA3 0xc0011037 -#define MSR_AMD64_IBSDCLINAD 0xc0011038 -#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 -#define MSR_AMD64_IBSOP_REG_COUNT 7 -#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) -#define MSR_AMD64_IBSCTL 0xc001103a -#define MSR_AMD64_IBSBRTARGET 0xc001103b -#define MSR_AMD64_IBSOPDATA4 0xc001103d -#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ -#define MSR_AMD64_SEV 0xc0010131 -#define MSR_AMD64_SEV_ENABLED_BIT 0 -#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) - -/* Fam 17h MSRs */ -#define MSR_F17H_IRPERF 0xc00000e9 - -/* Fam 16h MSRs */ -#define MSR_F16H_L2I_PERF_CTL 0xc0010230 -#define MSR_F16H_L2I_PERF_CTR 0xc0010231 -#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 -#define MSR_F16H_DR2_ADDR_MASK 0xc001101a -#define MSR_F16H_DR3_ADDR_MASK 0xc001101b -#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 - -/* Fam 15h MSRs */ -#define MSR_F15H_PERF_CTL 0xc0010200 -#define MSR_F15H_PERF_CTR 0xc0010201 -#define MSR_F15H_NB_PERF_CTL 0xc0010240 -#define MSR_F15H_NB_PERF_CTR 0xc0010241 -#define MSR_F15H_PTSC 0xc0010280 -#define MSR_F15H_IC_CFG 0xc0011021 - -/* Fam 10h MSRs */ -#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 -#define FAM10H_MMIO_CONF_ENABLE (1<<0) -#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf -#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 -#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL -#define FAM10H_MMIO_CONF_BASE_SHIFT 20 -#define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) - -/* K8 MSRs */ -#define MSR_K8_TOP_MEM1 0xc001001a -#define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) -#define MSR_K8_INT_PENDING_MSG 0xc0010055 -/* C1E active bits in int pending message */ -#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 -#define MSR_K8_TSEG_ADDR 0xc0010112 -#define MSR_K8_TSEG_MASK 0xc0010113 -#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ -#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ -#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ - -/* K7 MSRs */ -#define MSR_K7_EVNTSEL0 0xc0010000 -#define MSR_K7_PERFCTR0 0xc0010004 -#define MSR_K7_EVNTSEL1 0xc0010001 -#define MSR_K7_PERFCTR1 0xc0010005 -#define MSR_K7_EVNTSEL2 0xc0010002 -#define MSR_K7_PERFCTR2 0xc0010006 -#define MSR_K7_EVNTSEL3 0xc0010003 -#define MSR_K7_PERFCTR3 0xc0010007 -#define MSR_K7_CLK_CTL 0xc001001b -#define MSR_K7_HWCR 0xc0010015 -#define MSR_K7_HWCR_SMMLOCK_BIT 0 -#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) -#define MSR_K7_FID_VID_CTL 0xc0010041 -#define MSR_K7_FID_VID_STATUS 0xc0010042 - -/* K6 MSRs */ -#define MSR_K6_WHCR 0xc0000082 -#define MSR_K6_UWCCR 0xc0000085 -#define MSR_K6_EPMR 0xc0000086 -#define MSR_K6_PSOR 0xc0000087 -#define MSR_K6_PFIR 0xc0000088 - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x00000107 -#define MSR_IDT_FCR2 0x00000108 -#define MSR_IDT_FCR3 0x00000109 -#define MSR_IDT_FCR4 0x0000010a - -#define MSR_IDT_MCR0 0x00000110 -#define MSR_IDT_MCR1 0x00000111 -#define MSR_IDT_MCR2 0x00000112 -#define MSR_IDT_MCR3 0x00000113 -#define MSR_IDT_MCR4 0x00000114 -#define MSR_IDT_MCR5 0x00000115 -#define MSR_IDT_MCR6 0x00000116 -#define MSR_IDT_MCR7 0x00000117 -#define MSR_IDT_MCR_CTRL 0x00000120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x00001107 -#define MSR_VIA_LONGHAUL 0x0000110a -#define MSR_VIA_RNG 0x0000110b -#define MSR_VIA_BCR2 0x00001147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0x00000000 -#define MSR_IA32_P5_MC_TYPE 0x00000001 -#define MSR_IA32_TSC 0x00000010 -#define MSR_IA32_PLATFORM_ID 0x00000017 -#define MSR_IA32_EBL_CR_POWERON 0x0000002a -#define MSR_EBC_FREQUENCY_ID 0x0000002c -#define MSR_SMI_COUNT 0x00000034 -#define MSR_IA32_FEATURE_CONTROL 0x0000003a -#define MSR_IA32_TSC_ADJUST 0x0000003b -#define MSR_IA32_BNDCFGS 0x00000d90 - -#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc - -#define MSR_IA32_XSS 0x00000da0 - -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) -#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) -#define FEATURE_CONTROL_LMCE (1<<20) - -#define MSR_IA32_APICBASE 0x0000001b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - #define APIC_BASE_MSR 0x800 #define X2APIC_ENABLE (1UL << 10) #define APIC_ICR 0x300 @@ -805,288 +377,7 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, #define APIC_VECTOR_MASK 0x000FF #define APIC_ICR2 0x310 -#define MSR_IA32_TSCDEADLINE 0x000006e0 - -#define MSR_IA32_UCODE_WRITE 0x00000079 -#define MSR_IA32_UCODE_REV 0x0000008b - -#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b -#define MSR_IA32_SMBASE 0x0000009e - -#define MSR_IA32_PERF_STATUS 0x00000198 -#define MSR_IA32_PERF_CTL 0x00000199 -#define INTEL_PERF_CTL_MASK 0xffff -#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 -#define MSR_AMD_PERF_STATUS 0xc0010063 -#define MSR_AMD_PERF_CTL 0xc0010062 - -#define MSR_IA32_MPERF 0x000000e7 -#define MSR_IA32_APERF 0x000000e8 - -#define MSR_IA32_THERM_CONTROL 0x0000019a -#define MSR_IA32_THERM_INTERRUPT 0x0000019b - -#define THERM_INT_HIGH_ENABLE (1 << 0) -#define THERM_INT_LOW_ENABLE (1 << 1) -#define THERM_INT_PLN_ENABLE (1 << 24) - -#define MSR_IA32_THERM_STATUS 0x0000019c - -#define THERM_STATUS_PROCHOT (1 << 0) -#define THERM_STATUS_POWER_LIMIT (1 << 10) - -#define MSR_THERM2_CTL 0x0000019d - -#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) - -#define MSR_IA32_MISC_ENABLE 0x000001a0 - -#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 - -#define MSR_MISC_FEATURE_CONTROL 0x000001a4 -#define MSR_MISC_PWR_MGMT 0x000001aa - -#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 -#define ENERGY_PERF_BIAS_PERFORMANCE 0 -#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 -#define ENERGY_PERF_BIAS_NORMAL 6 -#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 -#define ENERGY_PERF_BIAS_POWERSAVE 15 - -#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 - -#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) -#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) - -#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 - -#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) -#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) -#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) - -/* Thermal Thresholds Support */ -#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) -#define THERM_SHIFT_THRESHOLD0 8 -#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) -#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) -#define THERM_SHIFT_THRESHOLD1 16 -#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) -#define THERM_STATUS_THRESHOLD0 (1 << 6) -#define THERM_LOG_THRESHOLD0 (1 << 7) -#define THERM_STATUS_THRESHOLD1 (1 << 8) -#define THERM_LOG_THRESHOLD1 (1 << 9) - -/* MISC_ENABLE bits: architectural */ -#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 -#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) -#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 -#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) -#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 -#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) -#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 -#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) -#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 -#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) -#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 -#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) -#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 -#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 -#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) -#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 -#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 -#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) - -/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ -#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 -#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) -#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 -#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) -#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 -#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 -#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 -#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 -#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 -#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) -#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 -#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) -#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 -#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) -#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 -#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 -#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) -#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 -#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) -#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 -#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 -#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) -#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 -#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) - -/* MISC_FEATURES_ENABLES non-architectural features */ -#define MSR_MISC_FEATURES_ENABLES 0x00000140 - -#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 -#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) -#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 - -#define MSR_IA32_TSC_DEADLINE 0x000006E0 - -/* P4/Xeon+ specific */ -#define MSR_IA32_MCG_EAX 0x00000180 -#define MSR_IA32_MCG_EBX 0x00000181 -#define MSR_IA32_MCG_ECX 0x00000182 -#define MSR_IA32_MCG_EDX 0x00000183 -#define MSR_IA32_MCG_ESI 0x00000184 -#define MSR_IA32_MCG_EDI 0x00000185 -#define MSR_IA32_MCG_EBP 0x00000186 -#define MSR_IA32_MCG_ESP 0x00000187 -#define MSR_IA32_MCG_EFLAGS 0x00000188 -#define MSR_IA32_MCG_EIP 0x00000189 -#define MSR_IA32_MCG_RESERVED 0x0000018a - -/* Pentium IV performance counter MSRs */ -#define MSR_P4_BPU_PERFCTR0 0x00000300 -#define MSR_P4_BPU_PERFCTR1 0x00000301 -#define MSR_P4_BPU_PERFCTR2 0x00000302 -#define MSR_P4_BPU_PERFCTR3 0x00000303 -#define MSR_P4_MS_PERFCTR0 0x00000304 -#define MSR_P4_MS_PERFCTR1 0x00000305 -#define MSR_P4_MS_PERFCTR2 0x00000306 -#define MSR_P4_MS_PERFCTR3 0x00000307 -#define MSR_P4_FLAME_PERFCTR0 0x00000308 -#define MSR_P4_FLAME_PERFCTR1 0x00000309 -#define MSR_P4_FLAME_PERFCTR2 0x0000030a -#define MSR_P4_FLAME_PERFCTR3 0x0000030b -#define MSR_P4_IQ_PERFCTR0 0x0000030c -#define MSR_P4_IQ_PERFCTR1 0x0000030d -#define MSR_P4_IQ_PERFCTR2 0x0000030e -#define MSR_P4_IQ_PERFCTR3 0x0000030f -#define MSR_P4_IQ_PERFCTR4 0x00000310 -#define MSR_P4_IQ_PERFCTR5 0x00000311 -#define MSR_P4_BPU_CCCR0 0x00000360 -#define MSR_P4_BPU_CCCR1 0x00000361 -#define MSR_P4_BPU_CCCR2 0x00000362 -#define MSR_P4_BPU_CCCR3 0x00000363 -#define MSR_P4_MS_CCCR0 0x00000364 -#define MSR_P4_MS_CCCR1 0x00000365 -#define MSR_P4_MS_CCCR2 0x00000366 -#define MSR_P4_MS_CCCR3 0x00000367 -#define MSR_P4_FLAME_CCCR0 0x00000368 -#define MSR_P4_FLAME_CCCR1 0x00000369 -#define MSR_P4_FLAME_CCCR2 0x0000036a -#define MSR_P4_FLAME_CCCR3 0x0000036b -#define MSR_P4_IQ_CCCR0 0x0000036c -#define MSR_P4_IQ_CCCR1 0x0000036d -#define MSR_P4_IQ_CCCR2 0x0000036e -#define MSR_P4_IQ_CCCR3 0x0000036f -#define MSR_P4_IQ_CCCR4 0x00000370 -#define MSR_P4_IQ_CCCR5 0x00000371 -#define MSR_P4_ALF_ESCR0 0x000003ca -#define MSR_P4_ALF_ESCR1 0x000003cb -#define MSR_P4_BPU_ESCR0 0x000003b2 -#define MSR_P4_BPU_ESCR1 0x000003b3 -#define MSR_P4_BSU_ESCR0 0x000003a0 -#define MSR_P4_BSU_ESCR1 0x000003a1 -#define MSR_P4_CRU_ESCR0 0x000003b8 -#define MSR_P4_CRU_ESCR1 0x000003b9 -#define MSR_P4_CRU_ESCR2 0x000003cc -#define MSR_P4_CRU_ESCR3 0x000003cd -#define MSR_P4_CRU_ESCR4 0x000003e0 -#define MSR_P4_CRU_ESCR5 0x000003e1 -#define MSR_P4_DAC_ESCR0 0x000003a8 -#define MSR_P4_DAC_ESCR1 0x000003a9 -#define MSR_P4_FIRM_ESCR0 0x000003a4 -#define MSR_P4_FIRM_ESCR1 0x000003a5 -#define MSR_P4_FLAME_ESCR0 0x000003a6 -#define MSR_P4_FLAME_ESCR1 0x000003a7 -#define MSR_P4_FSB_ESCR0 0x000003a2 -#define MSR_P4_FSB_ESCR1 0x000003a3 -#define MSR_P4_IQ_ESCR0 0x000003ba -#define MSR_P4_IQ_ESCR1 0x000003bb -#define MSR_P4_IS_ESCR0 0x000003b4 -#define MSR_P4_IS_ESCR1 0x000003b5 -#define MSR_P4_ITLB_ESCR0 0x000003b6 -#define MSR_P4_ITLB_ESCR1 0x000003b7 -#define MSR_P4_IX_ESCR0 0x000003c8 -#define MSR_P4_IX_ESCR1 0x000003c9 -#define MSR_P4_MOB_ESCR0 0x000003aa -#define MSR_P4_MOB_ESCR1 0x000003ab -#define MSR_P4_MS_ESCR0 0x000003c0 -#define MSR_P4_MS_ESCR1 0x000003c1 -#define MSR_P4_PMH_ESCR0 0x000003ac -#define MSR_P4_PMH_ESCR1 0x000003ad -#define MSR_P4_RAT_ESCR0 0x000003bc -#define MSR_P4_RAT_ESCR1 0x000003bd -#define MSR_P4_SAAT_ESCR0 0x000003ae -#define MSR_P4_SAAT_ESCR1 0x000003af -#define MSR_P4_SSU_ESCR0 0x000003be -#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ - -#define MSR_P4_TBPU_ESCR0 0x000003c2 -#define MSR_P4_TBPU_ESCR1 0x000003c3 -#define MSR_P4_TC_ESCR0 0x000003c4 -#define MSR_P4_TC_ESCR1 0x000003c5 -#define MSR_P4_U2L_ESCR0 0x000003b0 -#define MSR_P4_U2L_ESCR1 0x000003b1 - -#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 - -/* Intel Core-based CPU performance counters */ -#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 -#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a -#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b -#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d -#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e -#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 - -/* Geode defined MSRs */ -#define MSR_GEODE_BUSCONT_CONF0 0x00001900 - -/* Intel VT MSRs */ -#define MSR_IA32_VMX_BASIC 0x00000480 -#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 -#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 -#define MSR_IA32_VMX_MISC 0x00000485 -#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 -#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 -#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 -#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 -#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a -#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b -#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c -#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d -#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e -#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f -#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 -#define MSR_IA32_VMX_VMFUNC 0x00000491 - -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F -/* AMD-V MSRs */ - -#define MSR_VM_CR 0xc0010114 -#define MSR_VM_IGNNE 0xc0010115 -#define MSR_VM_HSAVE_PA 0xc0010117 +/* VMX_EPT_VPID_CAP bits */ +#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h new file mode 100644 index 000000000000..f4ea2355dbc2 --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -0,0 +1,297 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * tools/testing/selftests/kvm/include/x86_64/svm.h + * This is a copy of arch/x86/include/asm/svm.h + * + */ + +#ifndef SELFTEST_KVM_SVM_H +#define SELFTEST_KVM_SVM_H + +enum { + INTERCEPT_INTR, + INTERCEPT_NMI, + INTERCEPT_SMI, + INTERCEPT_INIT, + INTERCEPT_VINTR, + INTERCEPT_SELECTIVE_CR0, + INTERCEPT_STORE_IDTR, + INTERCEPT_STORE_GDTR, + INTERCEPT_STORE_LDTR, + INTERCEPT_STORE_TR, + INTERCEPT_LOAD_IDTR, + INTERCEPT_LOAD_GDTR, + INTERCEPT_LOAD_LDTR, + INTERCEPT_LOAD_TR, + INTERCEPT_RDTSC, + INTERCEPT_RDPMC, + INTERCEPT_PUSHF, + INTERCEPT_POPF, + INTERCEPT_CPUID, + INTERCEPT_RSM, + INTERCEPT_IRET, + INTERCEPT_INTn, + INTERCEPT_INVD, + INTERCEPT_PAUSE, + INTERCEPT_HLT, + INTERCEPT_INVLPG, + INTERCEPT_INVLPGA, + INTERCEPT_IOIO_PROT, + INTERCEPT_MSR_PROT, + INTERCEPT_TASK_SWITCH, + INTERCEPT_FERR_FREEZE, + INTERCEPT_SHUTDOWN, + INTERCEPT_VMRUN, + INTERCEPT_VMMCALL, + INTERCEPT_VMLOAD, + INTERCEPT_VMSAVE, + INTERCEPT_STGI, + INTERCEPT_CLGI, + INTERCEPT_SKINIT, + INTERCEPT_RDTSCP, + INTERCEPT_ICEBP, + INTERCEPT_WBINVD, + INTERCEPT_MONITOR, + INTERCEPT_MWAIT, + INTERCEPT_MWAIT_COND, + INTERCEPT_XSETBV, + INTERCEPT_RDPRU, +}; + + +struct __attribute__ ((__packed__)) vmcb_control_area { + u32 intercept_cr; + u32 intercept_dr; + u32 intercept_exceptions; + u64 intercept; + u8 reserved_1[40]; + u16 pause_filter_thresh; + u16 pause_filter_count; + u64 iopm_base_pa; + u64 msrpm_base_pa; + u64 tsc_offset; + u32 asid; + u8 tlb_ctl; + u8 reserved_2[3]; + u32 int_ctl; + u32 int_vector; + u32 int_state; + u8 reserved_3[4]; + u32 exit_code; + u32 exit_code_hi; + u64 exit_info_1; + u64 exit_info_2; + u32 exit_int_info; + u32 exit_int_info_err; + u64 nested_ctl; + u64 avic_vapic_bar; + u8 reserved_4[8]; + u32 event_inj; + u32 event_inj_err; + u64 nested_cr3; + u64 virt_ext; + u32 clean; + u32 reserved_5; + u64 next_rip; + u8 insn_len; + u8 insn_bytes[15]; + u64 avic_backing_page; /* Offset 0xe0 */ + u8 reserved_6[8]; /* Offset 0xe8 */ + u64 avic_logical_id; /* Offset 0xf0 */ + u64 avic_physical_id; /* Offset 0xf8 */ + u8 reserved_7[768]; +}; + + +#define TLB_CONTROL_DO_NOTHING 0 +#define TLB_CONTROL_FLUSH_ALL_ASID 1 +#define TLB_CONTROL_FLUSH_ASID 3 +#define TLB_CONTROL_FLUSH_ASID_LOCAL 7 + +#define V_TPR_MASK 0x0f + +#define V_IRQ_SHIFT 8 +#define V_IRQ_MASK (1 << V_IRQ_SHIFT) + +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + +#define V_INTR_PRIO_SHIFT 16 +#define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) + +#define V_IGN_TPR_SHIFT 20 +#define V_IGN_TPR_MASK (1 << V_IGN_TPR_SHIFT) + +#define V_INTR_MASKING_SHIFT 24 +#define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) + +#define V_GIF_ENABLE_SHIFT 25 +#define V_GIF_ENABLE_MASK (1 << V_GIF_ENABLE_SHIFT) + +#define AVIC_ENABLE_SHIFT 31 +#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) + +#define LBR_CTL_ENABLE_MASK BIT_ULL(0) +#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1) + +#define SVM_INTERRUPT_SHADOW_MASK 1 + +#define SVM_IOIO_STR_SHIFT 2 +#define SVM_IOIO_REP_SHIFT 3 +#define SVM_IOIO_SIZE_SHIFT 4 +#define SVM_IOIO_ASIZE_SHIFT 7 + +#define SVM_IOIO_TYPE_MASK 1 +#define SVM_IOIO_STR_MASK (1 << SVM_IOIO_STR_SHIFT) +#define SVM_IOIO_REP_MASK (1 << SVM_IOIO_REP_SHIFT) +#define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) +#define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) + +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + +#define SVM_NESTED_CTL_NP_ENABLE BIT(0) +#define SVM_NESTED_CTL_SEV_ENABLE BIT(1) + +struct __attribute__ ((__packed__)) vmcb_seg { + u16 selector; + u16 attrib; + u32 limit; + u64 base; +}; + +struct __attribute__ ((__packed__)) vmcb_save_area { + struct vmcb_seg es; + struct vmcb_seg cs; + struct vmcb_seg ss; + struct vmcb_seg ds; + struct vmcb_seg fs; + struct vmcb_seg gs; + struct vmcb_seg gdtr; + struct vmcb_seg ldtr; + struct vmcb_seg idtr; + struct vmcb_seg tr; + u8 reserved_1[43]; + u8 cpl; + u8 reserved_2[4]; + u64 efer; + u8 reserved_3[112]; + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u8 reserved_4[88]; + u64 rsp; + u8 reserved_5[24]; + u64 rax; + u64 star; + u64 lstar; + u64 cstar; + u64 sfmask; + u64 kernel_gs_base; + u64 sysenter_cs; + u64 sysenter_esp; + u64 sysenter_eip; + u64 cr2; + u8 reserved_6[32]; + u64 g_pat; + u64 dbgctl; + u64 br_from; + u64 br_to; + u64 last_excp_from; + u64 last_excp_to; +}; + +struct __attribute__ ((__packed__)) vmcb { + struct vmcb_control_area control; + struct vmcb_save_area save; +}; + +#define SVM_CPUID_FUNC 0x8000000a + +#define SVM_VM_CR_SVM_DISABLE 4 + +#define SVM_SELECTOR_S_SHIFT 4 +#define SVM_SELECTOR_DPL_SHIFT 5 +#define SVM_SELECTOR_P_SHIFT 7 +#define SVM_SELECTOR_AVL_SHIFT 8 +#define SVM_SELECTOR_L_SHIFT 9 +#define SVM_SELECTOR_DB_SHIFT 10 +#define SVM_SELECTOR_G_SHIFT 11 + +#define SVM_SELECTOR_TYPE_MASK (0xf) +#define SVM_SELECTOR_S_MASK (1 << SVM_SELECTOR_S_SHIFT) +#define SVM_SELECTOR_DPL_MASK (3 << SVM_SELECTOR_DPL_SHIFT) +#define SVM_SELECTOR_P_MASK (1 << SVM_SELECTOR_P_SHIFT) +#define SVM_SELECTOR_AVL_MASK (1 << SVM_SELECTOR_AVL_SHIFT) +#define SVM_SELECTOR_L_MASK (1 << SVM_SELECTOR_L_SHIFT) +#define SVM_SELECTOR_DB_MASK (1 << SVM_SELECTOR_DB_SHIFT) +#define SVM_SELECTOR_G_MASK (1 << SVM_SELECTOR_G_SHIFT) + +#define SVM_SELECTOR_WRITE_MASK (1 << 1) +#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK +#define SVM_SELECTOR_CODE_MASK (1 << 3) + +#define INTERCEPT_CR0_READ 0 +#define INTERCEPT_CR3_READ 3 +#define INTERCEPT_CR4_READ 4 +#define INTERCEPT_CR8_READ 8 +#define INTERCEPT_CR0_WRITE (16 + 0) +#define INTERCEPT_CR3_WRITE (16 + 3) +#define INTERCEPT_CR4_WRITE (16 + 4) +#define INTERCEPT_CR8_WRITE (16 + 8) + +#define INTERCEPT_DR0_READ 0 +#define INTERCEPT_DR1_READ 1 +#define INTERCEPT_DR2_READ 2 +#define INTERCEPT_DR3_READ 3 +#define INTERCEPT_DR4_READ 4 +#define INTERCEPT_DR5_READ 5 +#define INTERCEPT_DR6_READ 6 +#define INTERCEPT_DR7_READ 7 +#define INTERCEPT_DR0_WRITE (16 + 0) +#define INTERCEPT_DR1_WRITE (16 + 1) +#define INTERCEPT_DR2_WRITE (16 + 2) +#define INTERCEPT_DR3_WRITE (16 + 3) +#define INTERCEPT_DR4_WRITE (16 + 4) +#define INTERCEPT_DR5_WRITE (16 + 5) +#define INTERCEPT_DR6_WRITE (16 + 6) +#define INTERCEPT_DR7_WRITE (16 + 7) + +#define SVM_EVTINJ_VEC_MASK 0xff + +#define SVM_EVTINJ_TYPE_SHIFT 8 +#define SVM_EVTINJ_TYPE_MASK (7 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_TYPE_INTR (0 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_NMI (2 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_EXEPT (3 << SVM_EVTINJ_TYPE_SHIFT) +#define SVM_EVTINJ_TYPE_SOFT (4 << SVM_EVTINJ_TYPE_SHIFT) + +#define SVM_EVTINJ_VALID (1 << 31) +#define SVM_EVTINJ_VALID_ERR (1 << 11) + +#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK + +#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR +#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI +#define SVM_EXITINTINFO_TYPE_EXEPT SVM_EVTINJ_TYPE_EXEPT +#define SVM_EXITINTINFO_TYPE_SOFT SVM_EVTINJ_TYPE_SOFT + +#define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID +#define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR + +#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 +#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 +#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 + +#define SVM_EXITINFO_REG_MASK 0x0F + +#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP) + +#endif /* SELFTEST_KVM_SVM_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h new file mode 100644 index 000000000000..cd037917fece --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/x86_64/svm_utils.h + * Header for nested SVM testing + * + * Copyright (C) 2020, Red Hat, Inc. + */ + +#ifndef SELFTEST_KVM_SVM_UTILS_H +#define SELFTEST_KVM_SVM_UTILS_H + +#include <stdint.h> +#include "svm.h" +#include "processor.h" + +#define CPUID_SVM_BIT 2 +#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT) + +#define SVM_EXIT_VMMCALL 0x081 + +struct svm_test_data { + /* VMCB */ + struct vmcb *vmcb; /* gva */ + void *vmcb_hva; + uint64_t vmcb_gpa; + + /* host state-save area */ + struct vmcb_save_area *save_area; /* gva */ + void *save_area_hva; + uint64_t save_area_gpa; +}; + +struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); +void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); +void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); +void nested_svm_check_supported(void); + +#endif /* SELFTEST_KVM_SVM_UTILS_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 69b17055f63d..3d27069b9ed9 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -18,8 +18,8 @@ /* * Definitions of Primary Processor-Based VM-Execution Controls. */ -#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 -#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_INTR_WINDOW_EXITING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETTING 0x00000008 #define CPU_BASED_HLT_EXITING 0x00000080 #define CPU_BASED_INVLPG_EXITING 0x00000200 #define CPU_BASED_MWAIT_EXITING 0x00000400 @@ -30,7 +30,7 @@ #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 #define CPU_BASED_CR8_STORE_EXITING 0x00100000 #define CPU_BASED_TPR_SHADOW 0x00200000 -#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_NMI_WINDOW_EXITING 0x00400000 #define CPU_BASED_MOV_DR_EXITING 0x00800000 #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 #define CPU_BASED_USE_IO_BITMAPS 0x02000000 @@ -103,7 +103,7 @@ #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 #define EXIT_REASON_TRIPLE_FAULT 2 -#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 #define EXIT_REASON_TASK_SWITCH 9 #define EXIT_REASON_CPUID 10 @@ -569,6 +569,10 @@ struct vmx_pages { void *enlightened_vmcs_hva; uint64_t enlightened_vmcs_gpa; void *enlightened_vmcs; + + void *eptp_hva; + uint64_t eptp_gpa; + void *eptp; }; struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva); @@ -576,4 +580,16 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); +void nested_vmx_check_supported(void); + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot); +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot); +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot); +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); + #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 231d79e57774..6f38c3dc0d56 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -29,12 +29,9 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - for (i = 0; i < num_vcpus; i++) { - int vcpu_id = first_vcpu_id + i; - + for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++) /* This asserts that the vCPU was created. */ - vm_vcpu_add(vm, vcpu_id); - } + vm_vcpu_add(vm, i); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 486400a97374..86036a59a668 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -264,6 +264,9 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini case VM_MODE_P52V48_4K: TEST_ASSERT(false, "AArch64 does not support 4K sized pages " "with 52-bit physical address ranges"); + case VM_MODE_PXXV48_4K: + TEST_ASSERT(false, "AArch64 does not support 4K sized pages " + "with ANY-bit physical address ranges"); case VM_MODE_P52V48_64K: tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c new file mode 100644 index 000000000000..6cd91970fbad --- /dev/null +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" +#include "../kvm_util_internal.h" + +static vm_vaddr_t *ucall_exit_mmio_addr; + +static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) +{ + if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) + return false; + + virt_pg_map(vm, gpa, gpa, 0); + + ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; + sync_global_to_guest(vm, ucall_exit_mmio_addr); + + return true; +} + +void ucall_init(struct kvm_vm *vm, void *arg) +{ + vm_paddr_t gpa, start, end, step, offset; + unsigned int bits; + bool ret; + + if (arg) { + gpa = (vm_paddr_t)arg; + ret = ucall_mmio_init(vm, gpa); + TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); + return; + } + + /* + * Find an address within the allowed physical and virtual address + * spaces, that does _not_ have a KVM memory region associated with + * it. Identity mapping an address like this allows the guest to + * access it, but as KVM doesn't know what to do with it, it + * will assume it's something userspace handles and exit with + * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. + * Here we start with a guess that the addresses around 5/8th + * of the allowed space are unmapped and then work both down and + * up from there in 1/16th allowed space sized steps. + * + * Note, we need to use VA-bits - 1 when calculating the allowed + * virtual address space for an identity mapping because the upper + * half of the virtual address space is the two's complement of the + * lower and won't match physical addresses. + */ + bits = vm->va_bits - 1; + bits = vm->pa_bits < bits ? vm->pa_bits : bits; + end = 1ul << bits; + start = end * 5 / 8; + step = end / 16; + for (offset = 0; offset < end - start; offset += step) { + if (ucall_mmio_init(vm, start - offset)) + return; + if (ucall_mmio_init(vm, start + offset)) + return; + } + TEST_ASSERT(false, "Can't find a ucall mmio address"); +} + +void ucall_uninit(struct kvm_vm *vm) +{ + ucall_exit_mmio_addr = 0; + sync_global_to_guest(vm, ucall_exit_mmio_addr); +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (run->exit_reason == KVM_EXIT_MMIO && + run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { + vm_vaddr_t gva; + + TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, + "Unexpected ucall exit mmio address access"); + memcpy(&gva, run->mmio.data, sizeof(gva)); + memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c index 4911fc77d0f6..d1cf9f6e0e6b 100644 --- a/tools/testing/selftests/kvm/lib/assert.c +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -55,7 +55,7 @@ static void test_dump_stack(void) #pragma GCC diagnostic pop } -static pid_t gettid(void) +static pid_t _gettid(void) { return syscall(SYS_gettid); } @@ -72,7 +72,7 @@ test_assert(bool exp, const char *exp_str, fprintf(stderr, "==== Test Assertion Failure ====\n" " %s:%u: %s\n" " pid=%d tid=%d - %s\n", - file, line, exp_str, getpid(), gettid(), + file, line, exp_str, getpid(), _gettid(), strerror(errno)); test_dump_stack(); if (fmt) { diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 6e49bb039376..a6dd0401eb50 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -8,6 +8,7 @@ #include "test_util.h" #include "kvm_util.h" #include "kvm_util_internal.h" +#include "processor.h" #include <assert.h> #include <sys/mman.h> @@ -84,7 +85,7 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } -static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) +static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); if (vm->kvm_fd < 0) @@ -95,18 +96,19 @@ static void vm_open(struct kvm_vm *vm, int perm, unsigned long type) exit(KSFT_SKIP); } - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, type); + vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " "rc: %i errno: %i", vm->fd, errno); } const char * const vm_guest_mode_string[] = { - "PA-bits:52, VA-bits:48, 4K pages", - "PA-bits:52, VA-bits:48, 64K pages", - "PA-bits:48, VA-bits:48, 4K pages", - "PA-bits:48, VA-bits:48, 64K pages", - "PA-bits:40, VA-bits:48, 4K pages", - "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:52, VA-bits:48, 4K pages", + "PA-bits:52, VA-bits:48, 64K pages", + "PA-bits:48, VA-bits:48, 4K pages", + "PA-bits:48, VA-bits:48, 64K pages", + "PA-bits:40, VA-bits:48, 4K pages", + "PA-bits:40, VA-bits:48, 64K pages", + "PA-bits:ANY, VA-bits:48, 4K pages", }; _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, "Missing new mode strings?"); @@ -130,17 +132,17 @@ _Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES, * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, - int perm, unsigned long type) +struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; + DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); vm->mode = mode; - vm->type = type; - vm_open(vm, perm, type); + vm->type = 0; /* Setup mode specific traits. */ switch (vm->mode) { @@ -186,10 +188,32 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, vm->page_size = 0x10000; vm->page_shift = 16; break; + case VM_MODE_PXXV48_4K: +#ifdef __x86_64__ + kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits); + TEST_ASSERT(vm->va_bits == 48, "Linear address width " + "(%d bits) not supported", vm->va_bits); + vm->pgtable_levels = 4; + vm->page_size = 0x1000; + vm->page_shift = 12; + DEBUG("Guest physical address width detected: %d\n", + vm->pa_bits); +#else + TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on " + "non-x86 platforms"); +#endif + break; default: TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); } +#ifdef __aarch64__ + if (vm->pa_bits != 40) + vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); +#endif + + vm_open(vm, perm); + /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); sparsebit_set_num(vm->vpages_valid, @@ -212,7 +236,7 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { - return _vm_create(mode, phy_pages, perm, 0); + return _vm_create(mode, phy_pages, perm); } /* @@ -232,7 +256,7 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) { struct userspace_mem_region *region; - vm_open(vmp, perm, vmp->type); + vm_open(vmp, perm); if (vmp->has_irqchip) vm_create_irqchip(vmp); @@ -681,7 +705,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, * on error (e.g. currently no memory region using memslot as a KVM * memory slot ID). */ -static struct userspace_mem_region * +struct userspace_mem_region * memslot2region(struct kvm_vm *vm, uint32_t memslot) { struct userspace_mem_region *region; @@ -1349,6 +1373,42 @@ int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); } +void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu); + TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu); + TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg); + TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + +void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) +{ + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg); + TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)", + ret, errno, strerror(errno)); +} + /* * VCPU Ioctl * @@ -1628,3 +1688,18 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm) return val == 'Y'; } + +unsigned int vm_get_page_size(struct kvm_vm *vm) +{ + return vm->page_size; +} + +unsigned int vm_get_page_shift(struct kvm_vm *vm) +{ + return vm->page_shift; +} + +unsigned int vm_get_max_gfn(struct kvm_vm *vm) +{ + return vm->max_gfn; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index f36262e0f655..ac50c42750cf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -68,4 +68,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + #endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c new file mode 100644 index 000000000000..fd589dc9bfab --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2019 Red Hat, Inc. + */ +#include "kvm_util.h" + +void ucall_init(struct kvm_vm *vm, void *arg) +{ +} + +void ucall_uninit(struct kvm_vm *vm) +{ +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (run->exit_reason == KVM_EXIT_S390_SIEIC && + run->s390_sieic.icptcode == 4 && + (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ + (run->s390_sieic.ipb >> 16) == 0x501) { + int reg = run->s390_sieic.ipa & 0xf; + + memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]), + sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/ucall.c b/tools/testing/selftests/kvm/lib/ucall.c deleted file mode 100644 index dd9a66700f96..000000000000 --- a/tools/testing/selftests/kvm/lib/ucall.c +++ /dev/null @@ -1,157 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ucall support. A ucall is a "hypercall to userspace". - * - * Copyright (C) 2018, Red Hat, Inc. - */ -#include "kvm_util.h" -#include "kvm_util_internal.h" - -#define UCALL_PIO_PORT ((uint16_t)0x1000) - -static ucall_type_t ucall_type; -static vm_vaddr_t *ucall_exit_mmio_addr; - -static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa) -{ - if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1)) - return false; - - virt_pg_map(vm, gpa, gpa, 0); - - ucall_exit_mmio_addr = (vm_vaddr_t *)gpa; - sync_global_to_guest(vm, ucall_exit_mmio_addr); - - return true; -} - -void ucall_init(struct kvm_vm *vm, ucall_type_t type, void *arg) -{ - ucall_type = type; - sync_global_to_guest(vm, ucall_type); - - if (type == UCALL_PIO) - return; - - if (type == UCALL_MMIO) { - vm_paddr_t gpa, start, end, step, offset; - unsigned bits; - bool ret; - - if (arg) { - gpa = (vm_paddr_t)arg; - ret = ucall_mmio_init(vm, gpa); - TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa); - return; - } - - /* - * Find an address within the allowed physical and virtual address - * spaces, that does _not_ have a KVM memory region associated with - * it. Identity mapping an address like this allows the guest to - * access it, but as KVM doesn't know what to do with it, it - * will assume it's something userspace handles and exit with - * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64. - * Here we start with a guess that the addresses around 5/8th - * of the allowed space are unmapped and then work both down and - * up from there in 1/16th allowed space sized steps. - * - * Note, we need to use VA-bits - 1 when calculating the allowed - * virtual address space for an identity mapping because the upper - * half of the virtual address space is the two's complement of the - * lower and won't match physical addresses. - */ - bits = vm->va_bits - 1; - bits = vm->pa_bits < bits ? vm->pa_bits : bits; - end = 1ul << bits; - start = end * 5 / 8; - step = end / 16; - for (offset = 0; offset < end - start; offset += step) { - if (ucall_mmio_init(vm, start - offset)) - return; - if (ucall_mmio_init(vm, start + offset)) - return; - } - TEST_ASSERT(false, "Can't find a ucall mmio address"); - } -} - -void ucall_uninit(struct kvm_vm *vm) -{ - ucall_type = 0; - sync_global_to_guest(vm, ucall_type); - ucall_exit_mmio_addr = 0; - sync_global_to_guest(vm, ucall_exit_mmio_addr); -} - -static void ucall_pio_exit(struct ucall *uc) -{ -#ifdef __x86_64__ - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax"); -#endif -} - -static void ucall_mmio_exit(struct ucall *uc) -{ - *ucall_exit_mmio_addr = (vm_vaddr_t)uc; -} - -void ucall(uint64_t cmd, int nargs, ...) -{ - struct ucall uc = { - .cmd = cmd, - }; - va_list va; - int i; - - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; - - va_start(va, nargs); - for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); - va_end(va); - - switch (ucall_type) { - case UCALL_PIO: - ucall_pio_exit(&uc); - break; - case UCALL_MMIO: - ucall_mmio_exit(&uc); - break; - }; -} - -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) -{ - struct kvm_run *run = vcpu_state(vm, vcpu_id); - struct ucall ucall = {}; - bool got_ucall = false; - -#ifdef __x86_64__ - if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO && - run->io.port == UCALL_PIO_PORT) { - struct kvm_regs regs; - vcpu_regs_get(vm, vcpu_id, ®s); - memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall)); - got_ucall = true; - } -#endif - if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO && - run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { - vm_vaddr_t gva; - TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, - "Unexpected ucall exit mmio address access"); - memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall)); - got_ucall = true; - } - - if (got_ucall) { - vcpu_run_complete_io(vm, vcpu_id); - if (uc) - memcpy(uc, &ucall, sizeof(ucall)); - } - - return ucall.cmd; -} diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 6cb34a0fa200..683d3bdb8f6a 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -228,7 +228,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) { - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); /* If needed, create page map l4 table. */ @@ -261,7 +261,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint16_t index[4]; struct pageMapL4Entry *pml4e; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -547,7 +547,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) struct pageDirectoryEntry *pde; struct pageTableEntry *pte; - TEST_ASSERT(vm->mode == VM_MODE_P52V48_4K, "Attempt to use " + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); index[0] = (gva >> 12) & 0x1ffu; @@ -621,7 +621,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot); switch (vm->mode) { - case VM_MODE_P52V48_4K: + case VM_MODE_PXXV48_4K: sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); @@ -869,7 +869,7 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) return buffer.entry.data; } -/* VCPU Set MSR +/* _VCPU Set MSR * * Input Args: * vm - Virtual Machine @@ -879,12 +879,12 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) * * Output Args: None * - * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * Return: The result of KVM_SET_MSRS. * - * Set value of MSR for VCPU. + * Sets the value of an MSR for the given VCPU. */ -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value) +int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); struct { @@ -899,6 +899,29 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, buffer.entry.index = msr_index; buffer.entry.data = msr_value; r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); + return r; +} + +/* VCPU Set MSR + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * msr_index - Index of MSR + * msr_value - New value of MSR + * + * Output Args: None + * + * Return: On success, nothing. On failure a TEST_ASSERT is produced. + * + * Set value of MSR for VCPU. + */ +void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, + uint64_t msr_value) +{ + int r; + + r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value); TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" " rc: %i errno: %i", r, errno); } @@ -1000,19 +1023,45 @@ struct kvm_x86_state { struct kvm_msrs msrs; }; -static int kvm_get_num_msrs(struct kvm_vm *vm) +static int kvm_get_num_msrs_fd(int kvm_fd) { struct kvm_msr_list nmsrs; int r; nmsrs.nmsrs = 0; - r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", r); return nmsrs.nmsrs; } +static int kvm_get_num_msrs(struct kvm_vm *vm) +{ + return kvm_get_num_msrs_fd(vm->kvm_fd); +} + +struct kvm_msr_list *kvm_get_msr_index_list(void) +{ + struct kvm_msr_list *list; + int nmsrs, r, kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + nmsrs = kvm_get_num_msrs_fd(kvm_fd); + list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); + list->nmsrs = nmsrs; + r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + close(kvm_fd); + + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); + + return list; +} + struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); @@ -1060,9 +1109,11 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", - r); + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", + r); + } r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", @@ -1083,7 +1134,7 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)", + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); @@ -1103,9 +1154,11 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", r); - r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", - r); + if (kvm_check_cap(KVM_CAP_XCRS)) { + r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", + r); + } r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", @@ -1153,3 +1206,30 @@ bool is_intel_cpu(void) chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } + +uint32_t kvm_get_cpuid_max_basic(void) +{ + return kvm_get_supported_cpuid_entry(0)->eax; +} + +uint32_t kvm_get_cpuid_max_extended(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} + +void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) +{ + struct kvm_cpuid_entry2 *entry; + bool pae; + + /* SDM 4.1.4 */ + if (kvm_get_cpuid_max_extended() < 0x80000008) { + pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6); + *pa_bits = pae ? 36 : 32; + *va_bits = 32; + } else { + entry = kvm_get_supported_cpuid_entry(0x80000008); + *pa_bits = entry->eax & 0xff; + *va_bits = (entry->eax >> 8) & 0xff; + } +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c new file mode 100644 index 000000000000..6e05a8fc3fe0 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/lib/x86_64/svm.c + * Helpers used for nested SVM testing + * Largely inspired from KVM unit test svm.c + * + * Copyright (C) 2020, Red Hat, Inc. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "../kvm_util_internal.h" +#include "processor.h" +#include "svm_util.h" + +struct gpr64_regs guest_regs; +u64 rflags; + +/* Allocate memory regions for nested SVM tests. + * + * Input Args: + * vm - The VM to allocate guest-virtual addresses in. + * + * Output Args: + * p_svm_gva - The guest virtual address for the struct svm_test_data. + * + * Return: + * Pointer to structure with the addresses of the SVM areas. + */ +struct svm_test_data * +vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva) +{ + vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + struct svm_test_data *svm = addr_gva2hva(vm, svm_gva); + + svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb); + svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb); + + svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area); + svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area); + + *p_svm_gva = svm_gva; + return svm; +} + +static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, + u64 base, u32 limit, u32 attr) +{ + seg->selector = selector; + seg->attrib = attr; + seg->limit = limit; + seg->base = base; +} + +void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) +{ + struct vmcb *vmcb = svm->vmcb; + uint64_t vmcb_gpa = svm->vmcb_gpa; + struct vmcb_save_area *save = &vmcb->save; + struct vmcb_control_area *ctrl = &vmcb->control; + u32 data_seg_attr = 3 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_DB_MASK | SVM_SELECTOR_G_MASK; + u32 code_seg_attr = 9 | SVM_SELECTOR_S_MASK | SVM_SELECTOR_P_MASK + | SVM_SELECTOR_L_MASK | SVM_SELECTOR_G_MASK; + uint64_t efer; + + efer = rdmsr(MSR_EFER); + wrmsr(MSR_EFER, efer | EFER_SVME); + wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); + + memset(vmcb, 0, sizeof(*vmcb)); + asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory"); + vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); + vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->ds, get_ds(), 0, -1U, data_seg_attr); + vmcb_set_seg(&save->gdtr, 0, get_gdt().address, get_gdt().size, 0); + vmcb_set_seg(&save->idtr, 0, get_idt().address, get_idt().size, 0); + + ctrl->asid = 1; + save->cpl = 0; + save->efer = rdmsr(MSR_EFER); + asm volatile ("mov %%cr4, %0" : "=r"(save->cr4) : : "memory"); + asm volatile ("mov %%cr3, %0" : "=r"(save->cr3) : : "memory"); + asm volatile ("mov %%cr0, %0" : "=r"(save->cr0) : : "memory"); + asm volatile ("mov %%dr7, %0" : "=r"(save->dr7) : : "memory"); + asm volatile ("mov %%dr6, %0" : "=r"(save->dr6) : : "memory"); + asm volatile ("mov %%cr2, %0" : "=r"(save->cr2) : : "memory"); + save->g_pat = rdmsr(MSR_IA32_CR_PAT); + save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR); + ctrl->intercept = (1ULL << INTERCEPT_VMRUN) | + (1ULL << INTERCEPT_VMMCALL); + + vmcb->save.rip = (u64)guest_rip; + vmcb->save.rsp = (u64)guest_rsp; + guest_regs.rdi = (u64)svm; +} + +/* + * save/restore 64-bit general registers except rax, rip, rsp + * which are directly handed through the VMCB guest processor state + */ +#define SAVE_GPR_C \ + "xchg %%rbx, guest_regs+0x20\n\t" \ + "xchg %%rcx, guest_regs+0x10\n\t" \ + "xchg %%rdx, guest_regs+0x18\n\t" \ + "xchg %%rbp, guest_regs+0x30\n\t" \ + "xchg %%rsi, guest_regs+0x38\n\t" \ + "xchg %%rdi, guest_regs+0x40\n\t" \ + "xchg %%r8, guest_regs+0x48\n\t" \ + "xchg %%r9, guest_regs+0x50\n\t" \ + "xchg %%r10, guest_regs+0x58\n\t" \ + "xchg %%r11, guest_regs+0x60\n\t" \ + "xchg %%r12, guest_regs+0x68\n\t" \ + "xchg %%r13, guest_regs+0x70\n\t" \ + "xchg %%r14, guest_regs+0x78\n\t" \ + "xchg %%r15, guest_regs+0x80\n\t" + +#define LOAD_GPR_C SAVE_GPR_C + +/* + * selftests do not use interrupts so we dropped clgi/sti/cli/stgi + * for now. registers involved in LOAD/SAVE_GPR_C are eventually + * unmodified so they do not need to be in the clobber list. + */ +void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) +{ + asm volatile ( + "vmload\n\t" + "mov rflags, %%r15\n\t" // rflags + "mov %%r15, 0x170(%[vmcb])\n\t" + "mov guest_regs, %%r15\n\t" // rax + "mov %%r15, 0x1f8(%[vmcb])\n\t" + LOAD_GPR_C + "vmrun\n\t" + SAVE_GPR_C + "mov 0x170(%[vmcb]), %%r15\n\t" // rflags + "mov %%r15, rflags\n\t" + "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax + "mov %%r15, guest_regs\n\t" + "vmsave\n\t" + : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) + : "r15", "memory"); +} + +void nested_svm_check_supported(void) +{ + struct kvm_cpuid_entry2 *entry = + kvm_get_supported_cpuid_entry(0x80000001); + + if (!(entry->ecx & CPUID_SVM)) { + fprintf(stderr, "nested SVM not enabled, skipping test\n"); + exit(KSFT_SKIP); + } +} + diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c new file mode 100644 index 000000000000..da4d89ad5419 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucall support. A ucall is a "hypercall to userspace". + * + * Copyright (C) 2018, Red Hat, Inc. + */ +#include "kvm_util.h" + +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +void ucall_init(struct kvm_vm *vm, void *arg) +{ +} + +void ucall_uninit(struct kvm_vm *vm) +{ +} + +void ucall(uint64_t cmd, int nargs, ...) +{ + struct ucall uc = { + .cmd = cmd, + }; + va_list va; + int i; + + nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + + va_start(va, nargs); + for (i = 0; i < nargs; ++i) + uc.args[i] = va_arg(va, uint64_t); + va_end(va); + + asm volatile("in %[port], %%al" + : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); +} + +uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +{ + struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct ucall ucall = {}; + + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { + struct kvm_regs regs; + + vcpu_regs_get(vm, vcpu_id, ®s); + memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), + sizeof(ucall)); + + vcpu_run_complete_io(vm, vcpu_id); + if (uc) + memcpy(uc, &ucall, sizeof(ucall)); + } + + return ucall.cmd; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 204f847bd065..7aaa99ca4dbc 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,11 +7,59 @@ #include "test_util.h" #include "kvm_util.h" +#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" +#define PAGE_SHIFT_4K 12 + +#define KVM_EPT_PAGE_TABLE_MIN_PADDR 0x1c0000 + bool enable_evmcs; +struct eptPageTableEntry { + uint64_t readable:1; + uint64_t writable:1; + uint64_t executable:1; + uint64_t memory_type:3; + uint64_t ignore_pat:1; + uint64_t page_size:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t ignored_11_10:2; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t suppress_ve:1; +}; + +struct eptPageTablePointer { + uint64_t memory_type:3; + uint64_t page_walk_length:3; + uint64_t ad_enabled:1; + uint64_t reserved_11_07:5; + uint64_t address:40; + uint64_t reserved_63_52:12; +}; +int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) +{ + uint16_t evmcs_ver; + + struct kvm_enable_cap enable_evmcs_cap = { + .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + .args[0] = (unsigned long)&evmcs_ver + }; + + vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap); + + /* KVM should return supported EVMCS version range */ + TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && + (evmcs_ver & 0xff) > 0, + "Incorrect EVMCS version range: %x:%x\n", + evmcs_ver & 0xff, evmcs_ver >> 8); + + return evmcs_ver; +} + /* Allocate memory regions for nested VMX tests. * * Input Args: @@ -109,11 +157,11 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx) * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON * outside of SMX causes a #GP. */ - required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - required |= FEATURE_CONTROL_LOCKED; - feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + required = FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; + required |= FEAT_CTL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEAT_CTL); if ((feature_control & required) != required) - wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); + wrmsr(MSR_IA32_FEAT_CTL, feature_control | required); /* Enter VMX root operation. */ *(uint32_t *)(vmx->vmxon) = vmcs_revision(); @@ -154,15 +202,35 @@ bool load_vmcs(struct vmx_pages *vmx) */ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) { + uint32_t sec_exec_ctl = 0; + vmwrite(VIRTUAL_PROCESSOR_ID, 0); vmwrite(POSTED_INTR_NV, 0); vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS)); - if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0)) + + if (vmx->eptp_gpa) { + uint64_t ept_paddr; + struct eptPageTablePointer eptp = { + .memory_type = VMX_BASIC_MEM_TYPE_WB, + .page_walk_length = 3, /* + 1 */ + .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), + .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, + }; + + memcpy(&ept_paddr, &eptp, sizeof(ept_paddr)); + vmwrite(EPT_POINTER, ept_paddr); + sec_exec_ctl |= SECONDARY_EXEC_ENABLE_EPT; + } + + if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl)) vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); - else + else { vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS)); + GUEST_ASSERT(!sec_exec_ctl); + } + vmwrite(EXCEPTION_BITMAP, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ @@ -220,9 +288,9 @@ static inline void init_vmcs_host_state(void) vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); vmwrite(HOST_TR_BASE, - get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); - vmwrite(HOST_GDTR_BASE, get_gdt_base()); - vmwrite(HOST_IDTR_BASE, get_idt_base()); + get_desc64_base((struct desc64 *)(get_gdt().address + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt().address); + vmwrite(HOST_IDTR_BASE, get_idt().address); vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); } @@ -307,3 +375,162 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_host_state(); init_vmcs_guest_state(guest_rip, guest_rsp); } + +void nested_vmx_check_supported(void) +{ + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } +} + +void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot) +{ + uint16_t index[4]; + struct eptPageTableEntry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((nested_paddr % vm->page_size) == 0, + "Nested physical address not on page boundary,\n" + " nested_paddr: 0x%lx vm->page_size: 0x%x", + nested_paddr, vm->page_size); + TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (nested_paddr >> 12) & 0x1ffu; + index[1] = (nested_paddr >> 21) & 0x1ffu; + index[2] = (nested_paddr >> 30) & 0x1ffu; + index[3] = (nested_paddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = vmx->eptp_hva; + if (!pml4e[index[3]].readable) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].readable = true; + pml4e[index[3]].executable = true; + } + + /* Allocate page directory table if not present. */ + struct eptPageTableEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].readable) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].readable = true; + pdpe[index[2]].executable = true; + } + + /* Allocate page table if not present. */ + struct eptPageTableEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].readable) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].readable = true; + pde[index[1]].executable = true; + } + + /* Fill in page table entry. */ + struct eptPageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].readable = true; + pte[index[0]].executable = true; + + /* + * For now mark these as accessed and dirty because the only + * testcase we have needs that. Can be reconsidered later. + */ + pte[index[0]].accessed = true; + pte[index[0]].dirty = true; +} + +/* + * Map a range of EPT guest physical addresses to the VM's physical address + * + * Input Args: + * vm - Virtual Machine + * nested_paddr - Nested guest physical address to map + * paddr - VM Physical Address + * size - The size of the range to map + * eptp_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a nested guest translation for the + * page range starting at nested_paddr to the page range starting at paddr. + */ +void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, + uint64_t nested_paddr, uint64_t paddr, uint64_t size, + uint32_t eptp_memslot) +{ + size_t page_size = vm->page_size; + size_t npages = size / page_size; + + TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); + TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); + + while (npages--) { + nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot); + nested_paddr += page_size; + paddr += page_size; + } +} + +/* Prepare an identity extended page table that maps all the + * physical pages in VM. + */ +void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t memslot, uint32_t eptp_memslot) +{ + sparsebit_idx_t i, last; + struct userspace_mem_region *region = + memslot2region(vm, memslot); + + i = (region->region.guest_phys_addr >> vm->page_shift) - 1; + last = i + (region->region.memory_size >> vm->page_shift); + for (;;) { + i = sparsebit_next_clear(region->unused_phy_pages, i); + if (i > last) + break; + + nested_map(vmx, vm, + (uint64_t)i << vm->page_shift, + (uint64_t)i << vm->page_shift, + 1 << vm->page_shift, + eptp_memslot); + } +} + +void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0); + vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); + vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); +} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c new file mode 100644 index 000000000000..9edaa9a134ce --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test for s390x KVM_S390_MEM_OP + * + * Copyright (C) 2019, Red Hat, Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 1 + +static uint8_t mem1[65536]; +static uint8_t mem2[65536]; + +static void guest_code(void) +{ + int i; + + for (;;) { + for (i = 0; i < sizeof(mem2); i++) + mem2[i] = mem1[i]; + GUEST_SYNC(0); + } +} + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_s390_mem_op ksmo; + int rv, i, maxsize; + + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + + maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP); + if (!maxsize) { + fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n"); + exit(KSFT_SKIP); + } + if (maxsize > sizeof(mem1)) + maxsize = sizeof(mem1); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + run = vcpu_state(vm, VCPU_ID); + + for (i = 0; i < sizeof(mem1); i++) + mem1[i] = i * i + i; + + /* Set the first array */ + ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1); + ksmo.flags = 0; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + + /* Let the guest code copy the first array to the second */ + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, + "Unexpected exit reason: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + memset(mem2, 0xaa, sizeof(mem2)); + + /* Get the second array */ + ksmo.gaddr = (uintptr_t)mem2; + ksmo.flags = 0; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; + ksmo.buf = (uintptr_t)mem2; + ksmo.ar = 0; + vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + + TEST_ASSERT(!memcmp(mem1, mem2, maxsize), + "Memory contents do not match!"); + + /* Check error conditions - first bad size: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; + ksmo.size = -1; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); + + /* Zero size: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; + ksmo.size = 0; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), + "ioctl allows 0 as size"); + + /* Bad flags: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = -1; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); + + /* Bad operation: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; + ksmo.size = maxsize; + ksmo.op = -1; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations"); + + /* Bad guest address: */ + ksmo.gaddr = ~0xfffUL; + ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + + /* Bad host address: */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = 0; + ksmo.ar = 0; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EFAULT, + "ioctl does not report bad host memory address"); + + /* Bad access register: */ + run->psw_mask &= ~(3UL << (63 - 17)); + run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */ + vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */ + ksmo.gaddr = (uintptr_t)mem1; + ksmo.flags = 0; + ksmo.size = maxsize; + ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; + ksmo.buf = (uintptr_t)mem1; + ksmo.ar = 17; + rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo); + TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15"); + run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */ + vcpu_run(vm, VCPU_ID); /* Run to sync new state */ + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c new file mode 100644 index 000000000000..1485bc6c8999 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test for s390x CPU resets + * + * Copyright (C) 2020, IBM + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" + +#define VCPU_ID 3 +#define LOCAL_IRQS 32 + +struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS]; + +struct kvm_vm *vm; +struct kvm_run *run; +struct kvm_sync_regs *regs; +static uint64_t regs_null[16]; + +static uint64_t crs[16] = { 0x40000ULL, + 0x42000ULL, + 0, 0, 0, 0, 0, + 0x43000ULL, + 0, 0, 0, 0, 0, + 0x44000ULL, + 0, 0 +}; + +static void guest_code_initial(void) +{ + /* Round toward 0 */ + uint32_t fpc = 0x11; + + /* Dirty registers */ + asm volatile ( + " lctlg 0,15,%0\n" + " sfpc %1\n" + : : "Q" (crs), "d" (fpc)); + GUEST_SYNC(0); +} + +static void test_one_reg(uint64_t id, uint64_t value) +{ + struct kvm_one_reg reg; + uint64_t eval_reg; + + reg.addr = (uintptr_t)&eval_reg; + reg.id = id; + vcpu_get_reg(vm, VCPU_ID, ®); + TEST_ASSERT(eval_reg == value, "value == %s", value); +} + +static void assert_noirq(void) +{ + struct kvm_s390_irq_state irq_state; + int irqs; + + irq_state.len = sizeof(buf); + irq_state.buf = (unsigned long)buf; + irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state); + /* + * irqs contains the number of retrieved interrupts. Any interrupt + * (notably, the emergency call interrupt we have injected) should + * be cleared by the resets, so this should be 0. + */ + TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno); + TEST_ASSERT(!irqs, "IRQ pending"); +} + +static void assert_clear(void) +{ + struct kvm_sregs sregs; + struct kvm_regs regs; + struct kvm_fpu fpu; + + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0"); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0"); + + vcpu_fpu_get(vm, VCPU_ID, &fpu); + TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); +} + +static void assert_initial(void) +{ + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0"); + TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000"); + TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12), + "cr1-13 == 0"); + TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0"); + + vcpu_fpu_get(vm, VCPU_ID, &fpu); + TEST_ASSERT(!fpu.fpc, "fpc == 0"); + + test_one_reg(KVM_REG_S390_GBEA, 1); + test_one_reg(KVM_REG_S390_PP, 0); + test_one_reg(KVM_REG_S390_TODPR, 0); + test_one_reg(KVM_REG_S390_CPU_TIMER, 0); + test_one_reg(KVM_REG_S390_CLOCK_COMP, 0); +} + +static void assert_normal(void) +{ + test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); + assert_noirq(); +} + +static void inject_irq(int cpu_id) +{ + struct kvm_s390_irq_state irq_state; + struct kvm_s390_irq *irq = &buf[0]; + int irqs; + + /* Inject IRQ */ + irq_state.len = sizeof(struct kvm_s390_irq); + irq_state.buf = (unsigned long)buf; + irq->type = KVM_S390_INT_EMERGENCY; + irq->u.emerg.code = cpu_id; + irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state); + TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); +} + +static void test_normal(void) +{ + printf("Testing normal reset\n"); + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + inject_irq(VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0); + assert_normal(); + kvm_vm_free(vm); +} + +static void test_initial(void) +{ + printf("Testing initial reset\n"); + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + inject_irq(VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0); + assert_normal(); + assert_initial(); + kvm_vm_free(vm); +} + +static void test_clear(void) +{ + printf("Testing clear reset\n"); + vm = vm_create_default(VCPU_ID, 0, guest_code_initial); + run = vcpu_state(vm, VCPU_ID); + regs = &run->s.regs; + + vcpu_run(vm, VCPU_ID); + + inject_irq(VCPU_ID); + + vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0); + assert_normal(); + assert_initial(); + assert_clear(); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ + + test_initial(); + if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) { + test_normal(); + test_clear(); + } + return 0; +} diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index e85ff0d69548..b705637ca14b 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -25,10 +25,15 @@ static void guest_code(void) { - for (;;) { - asm volatile ("diag 0,0,0x501"); - asm volatile ("ahi 11,1"); - } + /* + * We embed diag 501 here instead of doing a ucall to avoid that + * the compiler has messed with r11 at the time of the ucall. + */ + asm volatile ( + "0: diag 0,0,0x501\n" + " ahi 11,1\n" + " j 0b\n" + ); } #define REG_COMPARE(reg) \ @@ -83,6 +88,36 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index f95c08343b48..92915e6408e7 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -79,11 +79,6 @@ int main(int argc, char *argv[]) struct kvm_x86_state *state; struct ucall uc; int stage; - uint16_t evmcs_ver; - struct kvm_enable_cap enable_evmcs_cap = { - .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - .args[0] = (unsigned long)&evmcs_ver - }; /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); @@ -96,13 +91,7 @@ int main(int argc, char *argv[]) exit(KSFT_SKIP); } - vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); - - /* KVM should return supported EVMCS version range */ - TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && - (evmcs_ver & 0xff) > 0, - "Incorrect EVMCS version range: %x:%x\n", - evmcs_ver & 0xff, evmcs_ver >> 8); + vcpu_enable_evmcs(vm, VCPU_ID); run = vcpu_state(vm, VCPU_ID); @@ -146,7 +135,7 @@ int main(int argc, char *argv[]) kvm_vm_restart(vm, O_RDWR); vm_vcpu_add(vm, VCPU_ID); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); + vcpu_enable_evmcs(vm, VCPU_ID); vcpu_load_state(vm, VCPU_ID, state); run = vcpu_state(vm, VCPU_ID); free(state); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index f72b3043db0e..443a2b54645b 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -18,6 +18,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "vmx.h" #define VCPU_ID 0 @@ -25,6 +26,25 @@ static void guest_code(void) { } +static int smt_possible(void) +{ + char buf[16]; + FILE *f; + bool res = 1; + + f = fopen("/sys/devices/system/cpu/smt/control", "r"); + if (f) { + if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) { + if (!strncmp(buf, "forceoff", 8) || + !strncmp(buf, "notsupported", 12)) + res = 0; + } + fclose(f); + } + + return res; +} + static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, int evmcs_enabled) { @@ -58,6 +78,14 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, TEST_ASSERT(!entry->padding[0] && !entry->padding[1] && !entry->padding[2], "padding should be zero"); + if (entry->function == 0x40000004) { + int nononarchcs = !!(entry->eax & (1UL << 18)); + + TEST_ASSERT(nononarchcs == !smt_possible(), + "NoNonArchitecturalCoreSharing bit" + " doesn't reflect SMT setting"); + } + /* * If needed for debug: * fprintf(stdout, @@ -106,12 +134,7 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; int rv; - uint16_t evmcs_ver; struct kvm_cpuid2 *hv_cpuid_entries; - struct kvm_enable_cap enable_evmcs_cap = { - .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - .args[0] = (unsigned long)&evmcs_ver - }; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); @@ -136,14 +159,14 @@ int main(int argc, char *argv[]) free(hv_cpuid_entries); - rv = _vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap); - - if (rv) { + if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { fprintf(stderr, "Enlightened VMCS is unsupported, skip related test\n"); goto vm_free; } + vcpu_enable_evmcs(vm, VCPU_ID); + hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm); if (!hv_cpuid_entries) return 1; diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 40050e44ec0a..f9334bd3cce9 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -99,8 +99,8 @@ int main(int argc, char *argv[]) msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_disabled(vm); test_msr_platform_info_enabled(vm); + test_msr_platform_info_disabled(vm); vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c new file mode 100644 index 000000000000..e280f68f6365 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_vmcall_test + * + * Copyright (C) 2020, Red Hat, Inc. + * + * Nested SVM testing: VMCALL + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" + +#define VCPU_ID 5 + +static struct kvm_vm *vm; + +static void l2_guest_code(struct svm_test_data *svm) +{ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + run_guest(vmcb, svm->vmcb_gpa); + + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t svm_gva; + + nested_svm_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vm, VCPU_ID, 1, svm_gva); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s", + (const char *)uc.args[0]); + /* NOT REACHED */ + case UCALL_SYNC: + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, + "Unknown ucall 0x%x.", uc.cmd); + } + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 11c2a70a7b87..5c8224256294 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -22,18 +22,19 @@ #define VCPU_ID 5 +#define UCALL_PIO_PORT ((uint16_t)0x1000) + +/* + * ucall is embedded here to protect against compiler reshuffling registers + * before calling a function. In this test we only need to get KVM_EXIT_IO + * vmexit and preserve RBX, no additional information is needed. + */ void guest_code(void) { - /* - * use a callee-save register, otherwise the compiler - * saves it around the call to GUEST_SYNC. - */ - register u32 stage asm("rbx"); - for (;;) { - GUEST_SYNC(0); - stage++; - asm volatile ("" : : "r" (stage)); - } + asm volatile("1: in %[port], %%al\n" + "add $0x1, %%rbx\n" + "jmp 1b" + : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx"); } static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 3b0ffe01dacd..5dfb53546a26 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -53,12 +53,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c new file mode 100644 index 000000000000..a223a6401258 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging test + * + * Copyright (C) 2018, Red Hat, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#define VCPU_ID 1 + +/* The memory slot index to track dirty pages */ +#define TEST_MEM_SLOT_INDEX 1 +#define TEST_MEM_SIZE 3 + +/* L1 guest test virtual memory offset */ +#define GUEST_TEST_MEM 0xc0000000 + +/* L2 guest test virtual memory offset */ +#define NESTED_TEST_MEM1 0xc0001000 +#define NESTED_TEST_MEM2 0xc0002000 + +static void l2_guest_code(void) +{ + *(volatile uint64_t *)NESTED_TEST_MEM1; + *(volatile uint64_t *)NESTED_TEST_MEM1 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + *(volatile uint64_t *)NESTED_TEST_MEM2 = 1; + GUEST_SYNC(true); + GUEST_SYNC(false); + + /* Exit to L1 and never come back. */ + vmcall(); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_SYNC(false); + GUEST_ASSERT(!vmlaunch()); + GUEST_SYNC(false); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_gva = 0; + struct vmx_pages *vmx; + unsigned long *bmap; + uint64_t *host_test_mem; + + struct kvm_vm *vm; + struct kvm_run *run; + struct ucall uc; + bool done = false; + + nested_vmx_check_supported(); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + run = vcpu_state(vm, VCPU_ID); + + /* Add an extra memory slot for testing dirty logging */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + GUEST_TEST_MEM, + TEST_MEM_SLOT_INDEX, + TEST_MEM_SIZE, + KVM_MEM_LOG_DIRTY_PAGES); + + /* + * Add an identity map for GVA range [0xc0000000, 0xc0002000). This + * affects both L1 and L2. However... + */ + virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, + TEST_MEM_SIZE * 4096, 0); + + /* + * ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to + * 0xc0000000. + * + * Note that prepare_eptp should be called only L1's GPA map is done, + * meaning after the last call to virt_map. + */ + prepare_eptp(vmx, vm, 0); + nested_map_memslot(vmx, vm, 0, 0); + nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0); + nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0); + + bmap = bitmap_alloc(TEST_MEM_SIZE); + host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM); + + while (!done) { + memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096); + _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + /* + * The nested guest wrote at offset 0x1000 in the memslot, but the + * dirty bitmap must be filled in according to L1 GPA, not L2. + */ + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + if (uc.args[1]) { + TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n"); + TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n"); + } else { + TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n"); + } + + TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n"); + TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n"); + TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n"); + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); + } + } +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index ed7218d166da..9ef7fab39d48 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -25,24 +25,17 @@ #define VMCS12_REVISION 0x11e57ed0 #define VCPU_ID 5 +bool have_evmcs; + void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state) { - volatile struct kvm_run *run; - vcpu_nested_state_set(vm, VCPU_ID, state, false); - run = vcpu_state(vm, VCPU_ID); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); } void test_nested_state_expect_errno(struct kvm_vm *vm, struct kvm_nested_state *state, int expected_errno) { - volatile struct kvm_run *run; int rv; rv = vcpu_nested_state_set(vm, VCPU_ID, state, true); @@ -50,12 +43,6 @@ void test_nested_state_expect_errno(struct kvm_vm *vm, "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)", strerror(expected_errno), expected_errno, rv, strerror(errno), errno); - run = vcpu_state(vm, VCPU_ID); - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s),\n", - run->exit_reason, - exit_reason_str(run->exit_reason)); } void test_nested_state_expect_einval(struct kvm_vm *vm, @@ -90,8 +77,9 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size) { memset(state, 0, size); state->flags = KVM_STATE_NESTED_GUEST_MODE | - KVM_STATE_NESTED_RUN_PENDING | - KVM_STATE_NESTED_EVMCS; + KVM_STATE_NESTED_RUN_PENDING; + if (have_evmcs) + state->flags |= KVM_STATE_NESTED_EVMCS; state->format = 0; state->size = size; state->hdr.vmx.vmxon_pa = 0x1000; @@ -141,13 +129,19 @@ void test_vmx_nested_state(struct kvm_vm *vm) /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without * setting the nested state but flags other than eVMCS must be clear. + * The eVMCS flag can be set if the enlightened VMCS capability has + * been enabled. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; test_nested_state_expect_einval(vm, state); - state->flags = KVM_STATE_NESTED_EVMCS; + state->flags &= KVM_STATE_NESTED_EVMCS; + if (have_evmcs) { + test_nested_state_expect_einval(vm, state); + vcpu_enable_evmcs(vm, VCPU_ID); + } test_nested_state(vm, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ @@ -230,7 +224,8 @@ int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_nested_state state; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { printf("KVM_CAP_NESTED_STATE not available, skipping test\n"); @@ -241,10 +236,7 @@ int main(int argc, char *argv[]) * AMD currently does not implement set_nested_state, so for now we * just early out. */ - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, 0); @@ -275,12 +267,7 @@ int main(int argc, char *argv[]) state.flags = KVM_STATE_NESTED_RUN_PENDING; test_nested_state_expect_einval(vm, &state); - /* - * TODO: When SVM support is added for KVM_SET_NESTED_STATE - * add tests here to support it like VMX. - */ - if (entry->ecx & CPUID_VMX) - test_vmx_nested_state(vm); + test_vmx_nested_state(vm); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index f36c10eba71e..69e482a95c47 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -98,7 +98,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); - control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); @@ -128,12 +128,8 @@ static void report(int64_t val) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & CPUID_VMX)) { - fprintf(stderr, "nested VMX not enabled, skipping test\n"); - exit(KSFT_SKIP); - } + nested_vmx_check_supported(); vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c new file mode 100644 index 000000000000..851ea81b9d9f --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019, Google LLC. + * + * Tests for the IA32_XSS MSR. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "vmx.h" + +#define VCPU_ID 1 +#define MSR_BITS 64 + +#define X86_FEATURE_XSAVES (1<<3) + +bool is_supported_msr(u32 msr_index) +{ + struct kvm_msr_list *list; + bool found = false; + int i; + + list = kvm_get_msr_index_list(); + for (i = 0; i < list->nmsrs; ++i) { + if (list->indices[i] == msr_index) { + found = true; + break; + } + } + + free(list); + return found; +} + +int main(int argc, char *argv[]) +{ + struct kvm_cpuid_entry2 *entry; + bool xss_supported = false; + struct kvm_vm *vm; + uint64_t xss_val; + int i, r; + + /* Create VM */ + vm = vm_create_default(VCPU_ID, 0, 0); + + if (kvm_get_cpuid_max_basic() >= 0xd) { + entry = kvm_get_supported_cpuid_index(0xd, 1); + xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES); + } + if (!xss_supported) { + printf("IA32_XSS is not supported by the vCPU.\n"); + exit(KSFT_SKIP); + } + + xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS); + TEST_ASSERT(xss_val == 0, + "MSR_IA32_XSS should be initialized to zero\n"); + + vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val); + /* + * At present, KVM only supports a guest IA32_XSS value of 0. Verify + * that trying to set the guest IA32_XSS to an unsupported value fails. + * Also, in the future when a non-zero value succeeds check that + * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST. + */ + for (i = 0; i < MSR_BITS; ++i) { + r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i); + TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS), + "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n"); + } + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh index 5511dddc5c2d..00a416fbc0ef 100755 --- a/tools/testing/selftests/lib/bitmap.sh +++ b/tools/testing/selftests/lib/bitmap.sh @@ -1,3 +1,3 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -$(dirname $0)/../kselftest_module.sh "bitmap" test_bitmap +$(dirname $0)/../kselftest/module.sh "bitmap" test_bitmap diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh index 43b28f24e453..370b79a9cb2e 100755 --- a/tools/testing/selftests/lib/prime_numbers.sh +++ b/tools/testing/selftests/lib/prime_numbers.sh @@ -1,4 +1,4 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # Checks fast/slow prime_number generation for inconsistencies -$(dirname $0)/../kselftest_module.sh "prime numbers" prime_numbers selftest=65536 +$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536 diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh index 2ffa61da0296..05f4544e87f9 100755 --- a/tools/testing/selftests/lib/printf.sh +++ b/tools/testing/selftests/lib/printf.sh @@ -1,4 +1,4 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # Tests the printf infrastructure using test_printf kernel module. -$(dirname $0)/../kselftest_module.sh "printf" test_printf +$(dirname $0)/../kselftest/module.sh "printf" test_printf diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh index 71f2be6afba6..be60ef6e1a7f 100755 --- a/tools/testing/selftests/lib/strscpy.sh +++ b/tools/testing/selftests/lib/strscpy.sh @@ -1,3 +1,3 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0+ -$(dirname $0)/../kselftest_module.sh "strscpy*" test_strscpy +$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile index fd405402c3ff..3876d8d62494 100644 --- a/tools/testing/selftests/livepatch/Makefile +++ b/tools/testing/selftests/livepatch/Makefile @@ -4,6 +4,8 @@ TEST_PROGS_EXTENDED := functions.sh TEST_PROGS := \ test-livepatch.sh \ test-callbacks.sh \ - test-shadow-vars.sh + test-shadow-vars.sh \ + test-state.sh \ + test-ftrace.sh include ../lib.mk diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README index b73cd0e2dd51..621d325425c2 100644 --- a/tools/testing/selftests/livepatch/README +++ b/tools/testing/selftests/livepatch/README @@ -35,7 +35,7 @@ Adding tests ------------ See the common functions.sh file for the existing collection of utility -functions, most importantly set_dynamic_debug() and check_result(). The +functions, most importantly setup_config() and check_result(). The latter function greps the kernel's ring buffer for "livepatch:" and "test_klp" strings, so tests be sure to include one of those strings for result comparison. Other utility functions include general module diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config index 0dd7700464a8..ad23100cb27c 100644 --- a/tools/testing/selftests/livepatch/config +++ b/tools/testing/selftests/livepatch/config @@ -1 +1,3 @@ +CONFIG_LIVEPATCH=y +CONFIG_DYNAMIC_DEBUG=y CONFIG_TEST_LIVEPATCH=m diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 79b0affd21fb..2aab9791791d 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -7,6 +7,9 @@ MAX_RETRIES=600 RETRY_INTERVAL=".1" # seconds +# Kselftest framework requirement - SKIP code is 4 +ksft_skip=4 + # log(msg) - write message to kernel log # msg - insightful words function log() { @@ -18,7 +21,16 @@ function log() { function skip() { log "SKIP: $1" echo "SKIP: $1" >&2 - exit 4 + exit $ksft_skip +} + +# root test +function is_root() { + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "skip all tests: must be run as root" >&2 + exit $ksft_skip + fi } # die(msg) - game over, man @@ -29,29 +41,45 @@ function die() { exit 1 } -function push_dynamic_debug() { - DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ - awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') +function push_config() { + DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ + awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') + FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled) } -function pop_dynamic_debug() { +function pop_config() { if [[ -n "$DYNAMIC_DEBUG" ]]; then echo -n "$DYNAMIC_DEBUG" > /sys/kernel/debug/dynamic_debug/control fi + if [[ -n "$FTRACE_ENABLED" ]]; then + sysctl kernel.ftrace_enabled="$FTRACE_ENABLED" &> /dev/null + fi } -# set_dynamic_debug() - save the current dynamic debug config and tweak -# it for the self-tests. Set a script exit trap -# that restores the original config. function set_dynamic_debug() { - push_dynamic_debug - trap pop_dynamic_debug EXIT INT TERM HUP cat <<-EOF > /sys/kernel/debug/dynamic_debug/control file kernel/livepatch/* +p func klp_try_switch_task -p EOF } +function set_ftrace_enabled() { + result=$(sysctl kernel.ftrace_enabled="$1" 2>&1 | paste --serial --delimiters=' ') + echo "livepatch: $result" > /dev/kmsg +} + +# setup_config - save the current config and set a script exit trap that +# restores the original config. Setup the dynamic debug +# for verbose livepatching output and turn on +# the ftrace_enabled sysctl. +function setup_config() { + is_root + push_config + set_dynamic_debug + set_ftrace_enabled 1 + trap pop_config EXIT INT TERM HUP +} + # loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES, # sleep $RETRY_INTERVAL between attempts # cmd - command and its arguments to run diff --git a/tools/testing/selftests/livepatch/settings b/tools/testing/selftests/livepatch/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/livepatch/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index e97a9dcb73c7..a35289b13c9c 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -9,7 +9,7 @@ MOD_LIVEPATCH2=test_klp_callbacks_demo2 MOD_TARGET=test_klp_callbacks_mod MOD_TARGET_BUSY=test_klp_callbacks_busy -set_dynamic_debug +setup_config # TEST: target module before livepatch diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh new file mode 100755 index 000000000000..e2a76887f40a --- /dev/null +++ b/tools/testing/selftests/livepatch/test-ftrace.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 Joe Lawrence <joe.lawrence@redhat.com> + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_livepatch + +setup_config + + +# TEST: livepatch interaction with ftrace_enabled sysctl +# - turn ftrace_enabled OFF and verify livepatches can't load +# - turn ftrace_enabled ON and verify livepatch can load +# - verify that ftrace_enabled can't be turned OFF while a livepatch is loaded + +echo -n "TEST: livepatch interaction with ftrace_enabled sysctl ... " +dmesg -C + +set_ftrace_enabled 0 +load_failing_mod $MOD_LIVEPATCH + +set_ftrace_enabled 1 +load_lp $MOD_LIVEPATCH +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi + +set_ftrace_enabled 0 +if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then + echo -e "FAIL\n\n" + die "livepatch kselftest(s) failed" +fi +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "livepatch: kernel.ftrace_enabled = 0 +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16) +livepatch: failed to patch object 'vmlinux' +livepatch: failed to enable patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Device or resource busy +livepatch: kernel.ftrace_enabled = 1 +% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy kernel.ftrace_enabled = 0 +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +exit 0 diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index f05268aea859..493e3df415a1 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -7,7 +7,7 @@ MOD_LIVEPATCH=test_klp_livepatch MOD_REPLACE=test_klp_atomic_replace -set_dynamic_debug +setup_config # TEST: basic function patching diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh index 04a37831e204..1aae73299114 100755 --- a/tools/testing/selftests/livepatch/test-shadow-vars.sh +++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh @@ -6,7 +6,7 @@ MOD_TEST=test_klp_shadow_vars -set_dynamic_debug +setup_config # TEST: basic shadow variable API diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh new file mode 100755 index 000000000000..a08212708115 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-state.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 SUSE + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_state +MOD_LIVEPATCH2=test_klp_state2 +MOD_LIVEPATCH3=test_klp_state3 + +setup_config + +# TEST: Loading and removing a module that modifies the system state + +echo -n "TEST: system state modification ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: allocate_loglevel_state: allocating space to store console_loglevel +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel +livepatch: '$MOD_LIVEPATCH': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH: restore_console_loglevel: restoring console_loglevel +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +$MOD_LIVEPATCH: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH: free_loglevel_state: freeing space for the stored console_loglevel +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + + +# TEST: Take over system state change by a cumulative patch + +echo -n "TEST: taking over system state modification ... " +dmesg -C + +load_lp $MOD_LIVEPATCH +load_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 + +check_result "% modprobe $MOD_LIVEPATCH +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +$MOD_LIVEPATCH: pre_patch_callback: vmlinux +$MOD_LIVEPATCH: allocate_loglevel_state: allocating space to store console_loglevel +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +$MOD_LIVEPATCH: post_patch_callback: vmlinux +$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel +livepatch: '$MOD_LIVEPATCH': patching complete +% modprobe $MOD_LIVEPATCH2 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +$MOD_LIVEPATCH2: allocate_loglevel_state: space to store console_loglevel already allocated +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change +livepatch: '$MOD_LIVEPATCH2': patching complete +% rmmod $MOD_LIVEPATCH +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2" + + +# TEST: Take over system state change by a cumulative patch + +echo -n "TEST: compatible cumulative livepatches ... " +dmesg -C + +load_lp $MOD_LIVEPATCH2 +load_lp $MOD_LIVEPATCH3 +unload_lp $MOD_LIVEPATCH2 +load_lp $MOD_LIVEPATCH2 +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH3 + +check_result "% modprobe $MOD_LIVEPATCH2 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +$MOD_LIVEPATCH2: allocate_loglevel_state: allocating space to store console_loglevel +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel +livepatch: '$MOD_LIVEPATCH2': patching complete +% modprobe $MOD_LIVEPATCH3 +livepatch: enabling patch '$MOD_LIVEPATCH3' +livepatch: '$MOD_LIVEPATCH3': initializing patching transition +$MOD_LIVEPATCH3: pre_patch_callback: vmlinux +$MOD_LIVEPATCH3: allocate_loglevel_state: space to store console_loglevel already allocated +livepatch: '$MOD_LIVEPATCH3': starting patching transition +livepatch: '$MOD_LIVEPATCH3': completing patching transition +$MOD_LIVEPATCH3: post_patch_callback: vmlinux +$MOD_LIVEPATCH3: fix_console_loglevel: taking over the console_loglevel change +livepatch: '$MOD_LIVEPATCH3': patching complete +% rmmod $MOD_LIVEPATCH2 +% modprobe $MOD_LIVEPATCH2 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +$MOD_LIVEPATCH2: allocate_loglevel_state: space to store console_loglevel already allocated +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +$MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change +livepatch: '$MOD_LIVEPATCH2': patching complete +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2 +% rmmod $MOD_LIVEPATCH3" + + +# TEST: Failure caused by incompatible cumulative livepatches + +echo -n "TEST: incompatible cumulative livepatches ... " +dmesg -C + +load_lp $MOD_LIVEPATCH2 +load_failing_mod $MOD_LIVEPATCH +disable_lp $MOD_LIVEPATCH2 +unload_lp $MOD_LIVEPATCH2 + +check_result "% modprobe $MOD_LIVEPATCH2 +livepatch: enabling patch '$MOD_LIVEPATCH2' +livepatch: '$MOD_LIVEPATCH2': initializing patching transition +$MOD_LIVEPATCH2: pre_patch_callback: vmlinux +$MOD_LIVEPATCH2: allocate_loglevel_state: allocating space to store console_loglevel +livepatch: '$MOD_LIVEPATCH2': starting patching transition +livepatch: '$MOD_LIVEPATCH2': completing patching transition +$MOD_LIVEPATCH2: post_patch_callback: vmlinux +$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel +livepatch: '$MOD_LIVEPATCH2': patching complete +% modprobe $MOD_LIVEPATCH +livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches. +modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Invalid argument +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition +$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel +livepatch: '$MOD_LIVEPATCH2': starting unpatching transition +livepatch: '$MOD_LIVEPATCH2': completing unpatching transition +$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux +$MOD_LIVEPATCH2: free_loglevel_state: freeing space for the stored console_loglevel +livepatch: '$MOD_LIVEPATCH2': unpatching complete +% rmmod $MOD_LIVEPATCH2" + +exit 0 diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile new file mode 100644 index 000000000000..1bcc9ee990eb --- /dev/null +++ b/tools/testing/selftests/lkdtm/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for LKDTM regression tests + +include ../lib.mk + +# NOTE: $(OUTPUT) won't get default value if used before lib.mk +TEST_FILES := tests.txt +TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//')) +all: $(TEST_GEN_PROGS) + +$(OUTPUT)/%: run.sh tests.txt + install -m 0744 run.sh $@ diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config new file mode 100644 index 000000000000..d874990e442b --- /dev/null +++ b/tools/testing/selftests/lkdtm/config @@ -0,0 +1 @@ +CONFIG_LKDTM=y diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh new file mode 100755 index 000000000000..dadf819148a4 --- /dev/null +++ b/tools/testing/selftests/lkdtm/run.sh @@ -0,0 +1,92 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# This reads tests.txt for the list of LKDTM tests to invoke. Any marked +# with a leading "#" are skipped. The rest of the line after the +# test name is either the text to look for in dmesg for a "success", +# or the rationale for why a test is marked to be skipped. +# +set -e +TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT +KSELFTEST_SKIP_TEST=4 + +# Verify we have LKDTM available in the kernel. +if [ ! -r $TRIGGER ] ; then + /sbin/modprobe -q lkdtm || true + if [ ! -r $TRIGGER ] ; then + echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)" + else + echo "Cannot write $TRIGGER (need to run as root?)" + fi + # Skip this test + exit $KSELFTEST_SKIP_TEST +fi + +# Figure out which test to run from our script name. +test=$(basename $0 .sh) +# Look up details about the test from master list of LKDTM tests. +line=$(egrep '^#?'"$test"'\b' tests.txt) +if [ -z "$line" ]; then + echo "Skipped: missing test '$test' in tests.txt" + exit $KSELFTEST_SKIP_TEST +fi +# Check that the test is known to LKDTM. +if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then + echo "Skipped: test '$test' missing in $TRIGGER!" + exit $KSELFTEST_SKIP_TEST +fi + +# Extract notes/expected output from test list. +test=$(echo "$line" | cut -d" " -f1) +if echo "$line" | grep -q ' ' ; then + expect=$(echo "$line" | cut -d" " -f2-) +else + expect="" +fi + +# If the test is commented out, report a skip +if echo "$test" | grep -q '^#' ; then + test=$(echo "$test" | cut -c2-) + if [ -z "$expect" ]; then + expect="crashes entire system" + fi + echo "Skipping $test: $expect" + exit $KSELFTEST_SKIP_TEST +fi + +# If no expected output given, assume an Oops with back trace is success. +if [ -z "$expect" ]; then + expect="call trace:" +fi + +# Clear out dmesg for output reporting +dmesg -c >/dev/null + +# Prepare log for report checking +LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX) +cleanup() { + rm -f "$LOG" +} +trap cleanup EXIT + +# Most shells yell about signals and we're expecting the "cat" process +# to usually be killed by the kernel. So we have to run it in a sub-shell +# and silence errors. +($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true + +# Record and dump the results +dmesg -c >"$LOG" +cat "$LOG" +# Check for expected output +if egrep -qi "$expect" "$LOG" ; then + echo "$test: saw '$expect': ok" + exit 0 +else + if egrep -qi XFAIL: "$LOG" ; then + echo "$test: saw 'XFAIL': [SKIP]" + exit $KSELFTEST_SKIP_TEST + else + echo "$test: missing '$expect': [FAIL]" + exit 1 + fi +fi diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt new file mode 100644 index 000000000000..92ca32143ae5 --- /dev/null +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -0,0 +1,71 @@ +#PANIC +BUG kernel BUG at +WARNING WARNING: +WARNING_MESSAGE message trigger +EXCEPTION +#LOOP Hangs the system +#EXHAUST_STACK Corrupts memory on failure +#CORRUPT_STACK Crashes entire system on success +#CORRUPT_STACK_STRONG Crashes entire system on success +CORRUPT_LIST_ADD list_add corruption +CORRUPT_LIST_DEL list_del corruption +CORRUPT_USER_DS Invalid address limit on user-mode return +STACK_GUARD_PAGE_LEADING +STACK_GUARD_PAGE_TRAILING +UNSET_SMEP CR4 bits went missing +DOUBLE_FAULT +UNALIGNED_LOAD_STORE_WRITE +#OVERWRITE_ALLOCATION Corrupts memory on failure +#WRITE_AFTER_FREE Corrupts memory on failure +READ_AFTER_FREE +#WRITE_BUDDY_AFTER_FREE Corrupts memory on failure +READ_BUDDY_AFTER_FREE +SLAB_FREE_DOUBLE +SLAB_FREE_CROSS +SLAB_FREE_PAGE +#SOFTLOCKUP Hangs the system +#HARDLOCKUP Hangs the system +#SPINLOCKUP Hangs the system +#HUNG_TASK Hangs the system +EXEC_DATA +EXEC_STACK +EXEC_KMALLOC +EXEC_VMALLOC +EXEC_RODATA +EXEC_USERSPACE +EXEC_NULL +ACCESS_USERSPACE +ACCESS_NULL +WRITE_RO +WRITE_RO_AFTER_INIT +WRITE_KERN +REFCOUNT_INC_OVERFLOW +REFCOUNT_ADD_OVERFLOW +REFCOUNT_INC_NOT_ZERO_OVERFLOW +REFCOUNT_ADD_NOT_ZERO_OVERFLOW +REFCOUNT_DEC_ZERO +REFCOUNT_DEC_NEGATIVE Negative detected: saturated +REFCOUNT_DEC_AND_TEST_NEGATIVE Negative detected: saturated +REFCOUNT_SUB_AND_TEST_NEGATIVE Negative detected: saturated +REFCOUNT_INC_ZERO +REFCOUNT_ADD_ZERO +REFCOUNT_INC_SATURATED Saturation detected: still saturated +REFCOUNT_DEC_SATURATED Saturation detected: still saturated +REFCOUNT_ADD_SATURATED Saturation detected: still saturated +REFCOUNT_INC_NOT_ZERO_SATURATED +REFCOUNT_ADD_NOT_ZERO_SATURATED +REFCOUNT_DEC_AND_TEST_SATURATED Saturation detected: still saturated +REFCOUNT_SUB_AND_TEST_SATURATED Saturation detected: still saturated +#REFCOUNT_TIMING timing only +#ATOMIC_TIMING timing only +USERCOPY_HEAP_SIZE_TO +USERCOPY_HEAP_SIZE_FROM +USERCOPY_HEAP_WHITELIST_TO +USERCOPY_HEAP_WHITELIST_FROM +USERCOPY_STACK_FRAME_TO +USERCOPY_STACK_FRAME_FROM +USERCOPY_STACK_BEYOND +USERCOPY_KERNEL +USERCOPY_KERNEL_DS +STACKLEAK_ERASING OK: the rest of the thread stack is properly erased +CFI_FORWARD_PROTO diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore index 020c44f49a9e..f2f7ec0a99b4 100644 --- a/tools/testing/selftests/membarrier/.gitignore +++ b/tools/testing/selftests/membarrier/.gitignore @@ -1 +1,2 @@ -membarrier_test +membarrier_test_multi_thread +membarrier_test_single_thread diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile index 97e3bdf3d1e9..34d1c81a2324 100644 --- a/tools/testing/selftests/membarrier/Makefile +++ b/tools/testing/selftests/membarrier/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -g -I../../../../usr/include/ +LDLIBS += -lpthread -TEST_GEN_PROGS := membarrier_test +TEST_GEN_PROGS := membarrier_test_single_thread \ + membarrier_test_multi_thread include ../lib.mk - diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test_impl.h index 70b4ddbf126b..186be69f0a59 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h @@ -1,10 +1,11 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #define _GNU_SOURCE #include <linux/membarrier.h> #include <syscall.h> #include <stdio.h> #include <errno.h> #include <string.h> +#include <pthread.h> #include "../kselftest.h" @@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void) return 0; } -static int test_membarrier(void) +static int test_membarrier_fail(void) { int status; @@ -233,10 +234,27 @@ static int test_membarrier(void) status = test_membarrier_flags_fail(); if (status) return status; - status = test_membarrier_global_success(); + status = test_membarrier_private_expedited_fail(); if (status) return status; - status = test_membarrier_private_expedited_fail(); + status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0); + if (status < 0) { + ksft_test_result_fail("sys_membarrier() failed\n"); + return status; + } + if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { + status = test_membarrier_private_expedited_sync_core_fail(); + if (status) + return status; + } + return 0; +} + +static int test_membarrier_success(void) +{ + int status; + + status = test_membarrier_global_success(); if (status) return status; status = test_membarrier_register_private_expedited_success(); @@ -251,9 +269,6 @@ static int test_membarrier(void) return status; } if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) { - status = test_membarrier_private_expedited_sync_core_fail(); - if (status) - return status; status = test_membarrier_register_private_expedited_sync_core_success(); if (status) return status; @@ -300,14 +315,3 @@ static int test_membarrier_query(void) ksft_test_result_pass("sys_membarrier available\n"); return 0; } - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(13); - - test_membarrier_query(); - test_membarrier(); - - return ksft_exit_pass(); -} diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c new file mode 100644 index 000000000000..ac5613e5b0eb --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/membarrier.h> +#include <syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include "membarrier_test_impl.h" + +static int thread_ready, thread_quit; +static pthread_mutex_t test_membarrier_thread_mutex = + PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t test_membarrier_thread_cond = + PTHREAD_COND_INITIALIZER; + +void *test_membarrier_thread(void *arg) +{ + pthread_mutex_lock(&test_membarrier_thread_mutex); + thread_ready = 1; + pthread_cond_broadcast(&test_membarrier_thread_cond); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + while (!thread_quit) + pthread_cond_wait(&test_membarrier_thread_cond, + &test_membarrier_thread_mutex); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + return NULL; +} + +static int test_mt_membarrier(void) +{ + int i; + pthread_t test_thread; + + pthread_create(&test_thread, NULL, + test_membarrier_thread, NULL); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + while (!thread_ready) + pthread_cond_wait(&test_membarrier_thread_cond, + &test_membarrier_thread_mutex); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + test_membarrier_fail(); + + test_membarrier_success(); + + pthread_mutex_lock(&test_membarrier_thread_mutex); + thread_quit = 1; + pthread_cond_broadcast(&test_membarrier_thread_cond); + pthread_mutex_unlock(&test_membarrier_thread_mutex); + + pthread_join(test_thread, NULL); + + return 0; +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(13); + + test_membarrier_query(); + + /* Multi-threaded */ + test_mt_membarrier(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c new file mode 100644 index 000000000000..c1c963902854 --- /dev/null +++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <linux/membarrier.h> +#include <syscall.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <pthread.h> + +#include "membarrier_test_impl.h" + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(13); + + test_membarrier_query(); + + test_membarrier_fail(); + + test_membarrier_success(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index c67d32eeb668..334a7eea2004 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -290,6 +290,40 @@ static void mfd_assert_read_shared(int fd) munmap(p, mfd_def_size); } +static void mfd_assert_fork_private_write(int fd) +{ + int *p; + pid_t pid; + + p = mmap(NULL, + mfd_def_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + fd, + 0); + if (p == MAP_FAILED) { + printf("mmap() failed: %m\n"); + abort(); + } + + p[0] = 22; + + pid = fork(); + if (pid == 0) { + p[0] = 33; + exit(0); + } else { + waitpid(pid, NULL, 0); + + if (p[0] != 22) { + printf("MAP_PRIVATE copy-on-write failed: %m\n"); + abort(); + } + } + + munmap(p, mfd_def_size); +} + static void mfd_assert_write(int fd) { ssize_t l; @@ -760,6 +794,8 @@ static void test_seal_future_write(void) mfd_assert_read_shared(fd2); mfd_fail_write(fd2); + mfd_assert_fork_private_write(fd); + munmap(p, mfd_def_size); close(fd2); close(fd); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index c7cced739c34..ecc52d4c034d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -21,3 +21,5 @@ ipv6_flowlabel ipv6_flowlabel_mgr so_txtime tcp_fastopen_backup_key +nettest +fin_ack_lat diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 1b24e36b4047..b5694196430a 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -10,14 +10,16 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh +TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh +TEST_PROGS += fin_ack_lat.sh TEST_PROGS_EXTENDED := in_netns.sh -TEST_GEN_FILES = socket +TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr TEST_GEN_FILES += tcp_fastopen_backup_key +TEST_GEN_FILES += fin_ack_lat TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh new file mode 100755 index 000000000000..4254ddc3f70b --- /dev/null +++ b/tools/testing/selftests/net/altnames.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/forwarding + +ALL_TESTS="altnames_test" +NUM_NETIFS=0 +source $lib_dir/lib.sh + +DUMMY_DEV=dummytest +SHORT_NAME=shortname +LONG_NAME=someveryveryveryveryveryverylongname + +altnames_test() +{ + RET=0 + local output + local name + + ip link property add $DUMMY_DEV altname $SHORT_NAME + check_err $? "Failed to add short alternative name" + + output=$(ip -j -p link show $SHORT_NAME) + check_err $? "Failed to do link show with short alternative name" + + name=$(echo $output | jq -e -r ".[0].altnames[0]") + check_err $? "Failed to get short alternative name from link show JSON" + + [ "$name" == "$SHORT_NAME" ] + check_err $? "Got unexpected short alternative name from link show JSON" + + ip -j -p link show $DUMMY_DEV &>/dev/null + check_err $? "Failed to do link show with original name" + + ip link property add $DUMMY_DEV altname $LONG_NAME + check_err $? "Failed to add long alternative name" + + output=$(ip -j -p link show $LONG_NAME) + check_err $? "Failed to do link show with long alternative name" + + name=$(echo $output | jq -e -r ".[0].altnames[1]") + check_err $? "Failed to get long alternative name from link show JSON" + + [ "$name" == "$LONG_NAME" ] + check_err $? "Got unexpected long alternative name from link show JSON" + + ip link property del $DUMMY_DEV altname $SHORT_NAME + check_err $? "Failed to add short alternative name" + + ip -j -p link show $SHORT_NAME &>/dev/null + check_fail $? "Unexpected success while trying to do link show with deleted short alternative name" + + # long name is left there on purpose to be removed alongside the device + + log_test "altnames test" +} + +setup_prepare() +{ + ip link add name $DUMMY_DEV type dummy +} + +cleanup() +{ + pre_cleanup + ip link del name $DUMMY_DEV +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh new file mode 100755 index 000000000000..fb5c55dd6df8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -0,0 +1,3890 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved. +# +# IPv4 and IPv6 functional tests focusing on VRF and routing lookups +# for various permutations: +# 1. icmp, tcp, udp and netfilter +# 2. client, server, no-server +# 3. global address on interface +# 4. global address on 'lo' +# 5. remote and local traffic +# 6. VRF and non-VRF permutations +# +# Setup: +# ns-A | ns-B +# No VRF case: +# [ lo ] [ eth1 ]---|---[ eth1 ] [ lo ] +# remote address +# VRF case: +# [ red ]---[ eth1 ]---|---[ eth1 ] [ lo ] +# +# ns-A: +# eth1: 172.16.1.1/24, 2001:db8:1::1/64 +# lo: 127.0.0.1/8, ::1/128 +# 172.16.2.1/32, 2001:db8:2::1/128 +# red: 127.0.0.1/8, ::1/128 +# 172.16.3.1/32, 2001:db8:3::1/128 +# +# ns-B: +# eth1: 172.16.1.2/24, 2001:db8:1::2/64 +# lo2: 127.0.0.1/8, ::1/128 +# 172.16.2.2/32, 2001:db8:2::2/128 +# +# ns-A to ns-C connection - only for VRF and same config +# as ns-A to ns-B +# +# server / client nomenclature relative to ns-A + +VERBOSE=0 + +NSA_DEV=eth1 +NSA_DEV2=eth2 +NSB_DEV=eth1 +NSC_DEV=eth2 +VRF=red +VRF_TABLE=1101 + +# IPv4 config +NSA_IP=172.16.1.1 +NSB_IP=172.16.1.2 +VRF_IP=172.16.3.1 +NS_NET=172.16.1.0/24 + +# IPv6 config +NSA_IP6=2001:db8:1::1 +NSB_IP6=2001:db8:1::2 +VRF_IP6=2001:db8:3::1 +NS_NET6=2001:db8:1::/120 + +NSA_LO_IP=172.16.2.1 +NSB_LO_IP=172.16.2.2 +NSA_LO_IP6=2001:db8:2::1 +NSB_LO_IP6=2001:db8:2::2 + +MD5_PW=abc123 +MD5_WRONG_PW=abc1234 + +MCAST=ff02::1 +# set after namespace create +NSA_LINKIP6= +NSB_LINKIP6= + +NSA=ns-A +NSB=ns-B +NSC=ns-C + +NSA_CMD="ip netns exec ${NSA}" +NSB_CMD="ip netns exec ${NSB}" +NSC_CMD="ip netns exec ${NSC}" + +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +################################################################################ +# utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + [ "${VERBOSE}" = "1" ] && echo + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "TEST: %-70s [ OK ]\n" "${msg}" + else + nfail=$((nfail+1)) + printf "TEST: %-70s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + kill_procs +} + +log_test_addr() +{ + local addr=$1 + local rc=$2 + local expected=$3 + local msg="$4" + local astr + + astr=$(addr2str ${addr}) + log_test $rc $expected "$msg - ${astr}" +} + +log_section() +{ + echo + echo "###########################################################################" + echo "$*" + echo "###########################################################################" + echo +} + +log_subsection() +{ + echo + echo "#################################################################" + echo "$*" + echo +} + +log_start() +{ + # make sure we have no test instances running + kill_procs + + if [ "${VERBOSE}" = "1" ]; then + echo + echo "#######################################################" + fi +} + +log_debug() +{ + if [ "${VERBOSE}" = "1" ]; then + echo + echo "$*" + echo + fi +} + +show_hint() +{ + if [ "${VERBOSE}" = "1" ]; then + echo "HINT: $*" + echo + fi +} + +kill_procs() +{ + killall nettest ping ping6 >/dev/null 2>&1 + sleep 1 +} + +do_run_cmd() +{ + local cmd="$*" + local out + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${cmd}" + fi + + out=$($cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + return $rc +} + +run_cmd() +{ + do_run_cmd ${NSA_CMD} $* +} + +run_cmd_nsb() +{ + do_run_cmd ${NSB_CMD} $* +} + +run_cmd_nsc() +{ + do_run_cmd ${NSC_CMD} $* +} + +setup_cmd() +{ + local cmd="$*" + local rc + + run_cmd ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + +setup_cmd_nsb() +{ + local cmd="$*" + local rc + + run_cmd_nsb ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + +# set sysctl values in NS-A +set_sysctl() +{ + echo "SYSCTL: $*" + echo + run_cmd sysctl -q -w $* +} + +################################################################################ +# Setup for tests + +addr2str() +{ + case "$1" in + 127.0.0.1) echo "loopback";; + ::1) echo "IPv6 loopback";; + + ${NSA_IP}) echo "ns-A IP";; + ${NSA_IP6}) echo "ns-A IPv6";; + ${NSA_LO_IP}) echo "ns-A loopback IP";; + ${NSA_LO_IP6}) echo "ns-A loopback IPv6";; + ${NSA_LINKIP6}|${NSA_LINKIP6}%*) echo "ns-A IPv6 LLA";; + + ${NSB_IP}) echo "ns-B IP";; + ${NSB_IP6}) echo "ns-B IPv6";; + ${NSB_LO_IP}) echo "ns-B loopback IP";; + ${NSB_LO_IP6}) echo "ns-B loopback IPv6";; + ${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";; + + ${VRF_IP}) echo "VRF IP";; + ${VRF_IP6}) echo "VRF IPv6";; + + ${MCAST}%*) echo "multicast IP";; + + *) echo "unknown";; + esac +} + +get_linklocal() +{ + local ns=$1 + local dev=$2 + local addr + + addr=$(ip -netns ${ns} -6 -br addr show dev ${dev} | \ + awk '{ + for (i = 3; i <= NF; ++i) { + if ($i ~ /^fe80/) + print $i + } + }' + ) + addr=${addr/\/*} + + [ -z "$addr" ] && return 1 + + echo $addr + + return 0 +} + +################################################################################ +# create namespaces and vrf + +create_vrf() +{ + local ns=$1 + local vrf=$2 + local table=$3 + local addr=$4 + local addr6=$5 + + ip -netns ${ns} link add ${vrf} type vrf table ${table} + ip -netns ${ns} link set ${vrf} up + ip -netns ${ns} route add vrf ${vrf} unreachable default metric 8192 + ip -netns ${ns} -6 route add vrf ${vrf} unreachable default metric 8192 + + ip -netns ${ns} addr add 127.0.0.1/8 dev ${vrf} + ip -netns ${ns} -6 addr add ::1 dev ${vrf} nodad + if [ "${addr}" != "-" ]; then + ip -netns ${ns} addr add dev ${vrf} ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip -netns ${ns} -6 addr add dev ${vrf} ${addr6} + fi + + ip -netns ${ns} ru del pref 0 + ip -netns ${ns} ru add pref 32765 from all lookup local + ip -netns ${ns} -6 ru del pref 0 + ip -netns ${ns} -6 ru add pref 32765 from all lookup local +} + +create_ns() +{ + local ns=$1 + local addr=$2 + local addr6=$3 + + ip netns add ${ns} + + ip -netns ${ns} link set lo up + if [ "${addr}" != "-" ]; then + ip -netns ${ns} addr add dev lo ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip -netns ${ns} -6 addr add dev lo ${addr6} + fi + + ip -netns ${ns} ro add unreachable default metric 8192 + ip -netns ${ns} -6 ro add unreachable default metric 8192 + + ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 +} + +# create veth pair to connect namespaces and apply addresses. +connect_ns() +{ + local ns1=$1 + local ns1_dev=$2 + local ns1_addr=$3 + local ns1_addr6=$4 + local ns2=$5 + local ns2_dev=$6 + local ns2_addr=$7 + local ns2_addr6=$8 + + ip -netns ${ns1} li add ${ns1_dev} type veth peer name tmp + ip -netns ${ns1} li set ${ns1_dev} up + ip -netns ${ns1} li set tmp netns ${ns2} name ${ns2_dev} + ip -netns ${ns2} li set ${ns2_dev} up + + if [ "${ns1_addr}" != "-" ]; then + ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr} + ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr} + fi + + if [ "${ns1_addr6}" != "-" ]; then + ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr6} + ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr6} + fi +} + +cleanup() +{ + # explicit cleanups to check those code paths + ip netns | grep -q ${NSA} + if [ $? -eq 0 ]; then + ip -netns ${NSA} link delete ${VRF} + ip -netns ${NSA} ro flush table ${VRF_TABLE} + + ip -netns ${NSA} addr flush dev ${NSA_DEV} + ip -netns ${NSA} -6 addr flush dev ${NSA_DEV} + ip -netns ${NSA} link set dev ${NSA_DEV} down + ip -netns ${NSA} link del dev ${NSA_DEV} + + ip netns del ${NSA} + fi + + ip netns del ${NSB} + ip netns del ${NSC} >/dev/null 2>&1 +} + +setup() +{ + local with_vrf=${1} + + # make sure we are starting with a clean slate + kill_procs + cleanup 2>/dev/null + + log_debug "Configuring network namespaces" + set -e + + create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128 + create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128 + connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSB} ${NSB_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 + + NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV}) + NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV}) + + # tell ns-A how to get to remote addresses of ns-B + if [ "${with_vrf}" = "yes" ]; then + create_vrf ${NSA} ${VRF} ${VRF_TABLE} ${VRF_IP} ${VRF_IP6} + + ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF} + ip -netns ${NSA} ro add vrf ${VRF} ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} + ip -netns ${NSA} -6 ro add vrf ${VRF} ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} + + ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} + ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} + + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 + else + ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} + ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} + fi + + + # tell ns-B how to get to remote addresses of ns-A + ip -netns ${NSB} ro add ${NSA_LO_IP}/32 via ${NSA_IP} dev ${NSB_DEV} + ip -netns ${NSB} ro add ${NSA_LO_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} + + set +e + + sleep 1 +} + +################################################################################ +# IPv4 + +ipv4_ping_novrf() +{ + local a + + # + # out + # + for a in ${NSB_IP} ${NSB_LO_IP} + do + log_start + run_cmd ping -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping out" + + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping out, device bind" + + log_start + run_cmd ping -c1 -w1 -I ${NSA_LO_IP} ${a} + log_test_addr ${a} $? 0 "ping out, address bind" + done + + # + # in + # + for a in ${NSA_IP} ${NSA_LO_IP} + do + log_start + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping in" + done + + # + # local traffic + # + for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1 + do + log_start + run_cmd ping -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping local" + done + + # + # local traffic, socket bound to device + # + # address on device + a=${NSA_IP} + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping local, device bind" + + # loopback addresses not reachable from device bind + # fails in a really weird way though because ipv4 special cases + # route lookups with oif set. + for a in ${NSA_LO_IP} 127.0.0.1 + do + log_start + show_hint "Fails since address on loopback device is out of device scope" + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 1 "ping local, device bind" + done + + # + # ip rule blocks reachability to remote address + # + log_start + setup_cmd ip rule add pref 32765 from all lookup local + setup_cmd ip rule del pref 0 from all lookup local + setup_cmd ip rule add pref 50 to ${NSB_LO_IP} prohibit + setup_cmd ip rule add pref 51 from ${NSB_IP} prohibit + + a=${NSB_LO_IP} + run_cmd ping -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, blocked by rule" + + # NOTE: ipv4 actually allows the lookup to fail and yet still create + # a viable rtable if the oif (e.g., bind to device) is set, so this + # case succeeds despite the rule + # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + + a=${NSA_LO_IP} + log_start + show_hint "Response generates ICMP (or arp request is ignored) due to ip rule" + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by rule" + + [ "$VERBOSE" = "1" ] && echo + setup_cmd ip rule del pref 32765 from all lookup local + setup_cmd ip rule add pref 0 from all lookup local + setup_cmd ip rule del pref 50 to ${NSB_LO_IP} prohibit + setup_cmd ip rule del pref 51 from ${NSB_IP} prohibit + + # + # route blocks reachability to remote address + # + log_start + setup_cmd ip route replace unreachable ${NSB_LO_IP} + setup_cmd ip route replace unreachable ${NSB_IP} + + a=${NSB_LO_IP} + run_cmd ping -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, blocked by route" + + # NOTE: ipv4 actually allows the lookup to fail and yet still create + # a viable rtable if the oif (e.g., bind to device) is set, so this + # case succeeds despite not having a route for the address + # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + + a=${NSA_LO_IP} + log_start + show_hint "Response is dropped (or arp request is ignored) due to ip route" + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by route" + + # + # remove 'remote' routes; fallback to default + # + log_start + setup_cmd ip ro del ${NSB_LO_IP} + + a=${NSB_LO_IP} + run_cmd ping -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, unreachable default route" + + # NOTE: ipv4 actually allows the lookup to fail and yet still create + # a viable rtable if the oif (e.g., bind to device) is set, so this + # case succeeds despite not having a route for the address + # run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} +} + +ipv4_ping_vrf() +{ + local a + + # should default on; does not exist on older kernels + set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null + + # + # out + # + for a in ${NSB_IP} ${NSB_LO_IP} + do + log_start + run_cmd ping -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 0 "ping out, VRF bind" + + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping out, device bind" + + log_start + run_cmd ip vrf exec ${VRF} ping -c1 -w1 -I ${NSA_IP} ${a} + log_test_addr ${a} $? 0 "ping out, vrf device + dev address bind" + + log_start + run_cmd ip vrf exec ${VRF} ping -c1 -w1 -I ${VRF_IP} ${a} + log_test_addr ${a} $? 0 "ping out, vrf device + vrf address bind" + done + + # + # in + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping in" + done + + # + # local traffic, local address + # + for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 + do + log_start + show_hint "Source address should be ${a}" + run_cmd ping -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 0 "ping local, VRF bind" + done + + # + # local traffic, socket bound to device + # + # address on device + a=${NSA_IP} + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping local, device bind" + + # vrf device is out of scope + for a in ${VRF_IP} 127.0.0.1 + do + log_start + show_hint "Fails since address on vrf device is out of device scope" + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 1 "ping local, device bind" + done + + # + # ip rule blocks address + # + log_start + setup_cmd ip rule add pref 50 to ${NSB_LO_IP} prohibit + setup_cmd ip rule add pref 51 from ${NSB_IP} prohibit + + a=${NSB_LO_IP} + run_cmd ping -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 2 "ping out, vrf bind, blocked by rule" + + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule" + + a=${NSA_LO_IP} + log_start + show_hint "Response lost due to ip rule" + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by rule" + + [ "$VERBOSE" = "1" ] && echo + setup_cmd ip rule del pref 50 to ${NSB_LO_IP} prohibit + setup_cmd ip rule del pref 51 from ${NSB_IP} prohibit + + # + # remove 'remote' routes; fallback to default + # + log_start + setup_cmd ip ro del vrf ${VRF} ${NSB_LO_IP} + + a=${NSB_LO_IP} + run_cmd ping -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 2 "ping out, vrf bind, unreachable route" + + log_start + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, unreachable route" + + a=${NSA_LO_IP} + log_start + show_hint "Response lost by unreachable route" + run_cmd_nsb ping -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, unreachable route" +} + +ipv4_ping() +{ + log_section "IPv4 ping" + + log_subsection "No VRF" + setup + set_sysctl net.ipv4.raw_l3mdev_accept=0 2>/dev/null + ipv4_ping_novrf + setup + set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null + ipv4_ping_novrf + + log_subsection "With VRF" + setup "yes" + ipv4_ping_vrf +} + +################################################################################ +# IPv4 TCP + +# +# MD5 tests without VRF +# +ipv4_tcp_md5_novrf() +{ + # + # single address + # + + # basic use case + log_start + run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: Single address config" + + # client sends MD5, server not configured + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -s & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: Server no config, client uses password" + + # wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: Client uses wrong password" + + # client from different address + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: Client address does not match address configured with password" + + # + # MD5 extension - prefix length + # + + # client in prefix + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: Prefix config" + + # client in prefix, wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: Prefix config, client uses wrong password" + + # client outside of prefix + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: Prefix config, client address not in configured prefix" +} + +# +# MD5 tests with VRF +# +ipv4_tcp_md5() +{ + # + # single address + # + + # basic use case + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Single address config" + + # client sends MD5, server not configured + log_start + show_hint "Should timeout since server does not have MD5 auth" + run_cmd nettest -s -d ${VRF} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Server no config, client uses password" + + # wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Client uses wrong password" + + # client from different address + log_start + show_hint "Should timeout since server config differs from client" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Client address does not match address configured with password" + + # + # MD5 extension - prefix length + # + + # client in prefix + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Prefix config" + + # client in prefix, wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" + + # client outside of prefix + log_start + show_hint "Should timeout since client address is outside of prefix" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" + + # + # duplicate config between default VRF and a VRF + # + + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" + + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" + + log_start + show_hint "Should timeout since client in default VRF uses VRF password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" + + log_start + show_hint "Should timeout since client in VRF uses default VRF password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" + + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" + + log_start + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" + + log_start + show_hint "Should timeout since client in default VRF uses VRF password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" + + log_start + show_hint "Should timeout since client in VRF uses default VRF password" + run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} & + run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" + + # + # negative tests + # + log_start + run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP} + log_test $? 1 "MD5: VRF: Device must be a VRF - single address" + + log_start + run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} + log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + +} + +ipv4_tcp_novrf() +{ + local a + + # + # server tests + # + for a in ${NSA_IP} ${NSA_LO_IP} + do + log_start + run_cmd nettest -s & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "Global server" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -d ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "Device server" + + # verify TCP reset sent and received + for a in ${NSA_IP} ${NSA_LO_IP} + do + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # client + # + for a in ${NSB_IP} ${NSB_LO_IP} + do + log_start + run_cmd_nsb nettest -s & + sleep 1 + run_cmd nettest -r ${a} -0 ${NSA_IP} + log_test_addr ${a} $? 0 "Client" + + log_start + run_cmd_nsb nettest -s & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 0 "Client, device bind" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -r ${a} + log_test_addr ${a} $? 1 "No server, unbound client" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + # + # local address tests + # + for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1 + do + log_start + run_cmd nettest -s & + sleep 1 + run_cmd nettest -r ${a} -0 ${a} -1 ${a} + log_test_addr ${a} $? 0 "Global server, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -d ${NSA_DEV} & + sleep 1 + run_cmd nettest -r ${a} -0 ${a} + log_test_addr ${a} $? 0 "Device server, unbound client, local connection" + + for a in ${NSA_LO_IP} 127.0.0.1 + do + log_start + show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" + run_cmd nettest -s -d ${NSA_DEV} & + sleep 1 + run_cmd nettest -r ${a} + log_test_addr ${a} $? 1 "Device server, unbound client, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s & + sleep 1 + run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 0 "Global server, device client, local connection" + + for a in ${NSA_LO_IP} 127.0.0.1 + do + log_start + show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" + run_cmd nettest -s & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "Global server, device client, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} + log_test_addr ${a} $? 0 "Device server, device client, local connection" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 1 "No server, device client, local conn" + + ipv4_tcp_md5_novrf +} + +ipv4_tcp_vrf() +{ + local a + + # disable global server + log_subsection "Global server disabled" + + set_sysctl net.ipv4.tcp_l3mdev_accept=0 + + # + # server tests + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + show_hint "Should fail 'Connection refused' since global server with VRF is disabled" + run_cmd nettest -s & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 1 "Global server" + + log_start + run_cmd nettest -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + log_start + run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "Device server" + + # verify TCP reset received + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # local address tests + # (${VRF_IP} and 127.0.0.1 both timeout) + a=${NSA_IP} + log_start + show_hint "Should fail 'Connection refused' since global server with VRF is disabled" + run_cmd nettest -s & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "Global server, local connection" + + # run MD5 tests + ipv4_tcp_md5 + + # + # enable VRF global server + # + log_subsection "VRF Global server enabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + show_hint "client socket should be bound to VRF" + run_cmd nettest -s -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "Global server" + + log_start + show_hint "client socket should be bound to VRF" + run_cmd nettest -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + # verify TCP reset received + log_start + show_hint "Should fail 'Connection refused'" + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + a=${NSA_IP} + log_start + show_hint "client socket should be bound to device" + run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 0 "Device server" + + # local address tests + for a in ${NSA_IP} ${VRF_IP} + do + log_start + show_hint "Should fail 'Connection refused' since client is not bound to VRF" + run_cmd nettest -s -d ${VRF} & + sleep 1 + run_cmd nettest -r ${a} + log_test_addr ${a} $? 1 "Global server, local connection" + done + + # + # client + # + for a in ${NSB_IP} ${NSB_LO_IP} + do + log_start + run_cmd_nsb nettest -s & + sleep 1 + run_cmd nettest -r ${a} -d ${VRF} + log_test_addr ${a} $? 0 "Client, VRF bind" + + log_start + run_cmd_nsb nettest -s & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 0 "Client, device bind" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -r ${a} -d ${VRF} + log_test_addr ${a} $? 1 "No server, VRF client" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 + do + log_start + run_cmd nettest -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd nettest -r ${a} -d ${VRF} -0 ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} + log_test_addr ${a} $? 0 "VRF server, device client, local connection" + + log_start + show_hint "Should fail 'No route to host' since client is out of VRF scope" + run_cmd nettest -s -d ${VRF} & + sleep 1 + run_cmd nettest -r ${a} + log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" + + log_start + run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -r ${a} -d ${VRF} -0 ${a} + log_test_addr ${a} $? 0 "Device server, VRF client, local connection" + + log_start + run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} + log_test_addr ${a} $? 0 "Device server, device client, local connection" +} + +ipv4_tcp() +{ + log_section "IPv4/TCP" + log_subsection "No VRF" + setup + + # tcp_l3mdev_accept should have no affect without VRF; + # run tests with it enabled and disabled to verify + log_subsection "tcp_l3mdev_accept disabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=0 + ipv4_tcp_novrf + log_subsection "tcp_l3mdev_accept enabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + ipv4_tcp_novrf + + log_subsection "With VRF" + setup "yes" + ipv4_tcp_vrf +} + +################################################################################ +# IPv4 UDP + +ipv4_udp_novrf() +{ + local a + + # + # server tests + # + for a in ${NSA_IP} ${NSA_LO_IP} + do + log_start + run_cmd nettest -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "Global server" + + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + a=${NSA_IP} + log_start + run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "Device server" + + # + # client + # + for a in ${NSB_IP} ${NSB_LO_IP} + do + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -0 ${NSA_IP} + log_test_addr ${a} $? 0 "Client" + + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} + log_test_addr ${a} $? 0 "Client, device bind" + + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} + log_test_addr ${a} $? 0 "Client, device send via cmsg" + + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} + log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -D -r ${a} + log_test_addr ${a} $? 1 "No server, unbound client" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -D -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + # + # local address tests + # + for a in ${NSA_IP} ${NSA_LO_IP} 127.0.0.1 + do + log_start + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} + log_test_addr ${a} $? 0 "Global server, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -r ${a} + log_test_addr ${a} $? 0 "Device server, unbound client, local connection" + + for a in ${NSA_LO_IP} 127.0.0.1 + do + log_start + show_hint "Should fail 'Connection refused' since address is out of device scope" + run_cmd nettest -s -D -d ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -r ${a} + log_test_addr ${a} $? 1 "Device server, unbound client, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -D & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Global server, device client, local connection" + + log_start + run_cmd nettest -s -D & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} + log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" + + log_start + run_cmd nettest -s -D & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} + log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" + + # IPv4 with device bind has really weird behavior - it overrides the + # fib lookup, generates an rtable and tries to send the packet. This + # causes failures for local traffic at different places + for a in ${NSA_LO_IP} 127.0.0.1 + do + log_start + show_hint "Should fail since addresses on loopback are out of device scope" + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 2 "Global server, device client, local connection" + + log_start + show_hint "Should fail since addresses on loopback are out of device scope" + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C + log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" + + log_start + show_hint "Should fail since addresses on loopback are out of device scope" + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S + log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} + log_test_addr ${a} $? 0 "Device server, device client, local conn" + + log_start + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 2 "No server, device client, local conn" +} + +ipv4_udp_vrf() +{ + local a + + # disable global server + log_subsection "Global server disabled" + set_sysctl net.ipv4.udp_l3mdev_accept=0 + + # + # server tests + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + show_hint "Fails because ingress is in a VRF and global server is disabled" + run_cmd nettest -D -s & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 1 "Global server" + + log_start + run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + log_start + run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server" + + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 1 "No server" + + log_start + show_hint "Should fail 'Connection refused' since global server is out of scope" + run_cmd nettest -D -s & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 1 "Global server, VRF client, local connection" + done + + a=${NSA_IP} + log_start + run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + + log_start + run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" + + a=${NSA_IP} + log_start + run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" + + log_start + run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" + + # enable global server + log_subsection "Global server enabled" + set_sysctl net.ipv4.udp_l3mdev_accept=1 + + # + # server tests + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "Global server" + + log_start + run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + log_start + run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd_nsb nettest -D -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # client tests + # + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} + log_test $? 0 "VRF client" + + log_start + run_cmd_nsb nettest -D -s & + sleep 1 + run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} + log_test $? 0 "Enslaved device client" + + # negative test - should fail + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -D -d ${VRF} -r ${NSB_IP} + log_test $? 1 "No server, VRF client" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -D -d ${NSA_DEV} -r ${NSB_IP} + log_test $? 1 "No server, enslaved device client" + + # + # local address tests + # + a=${NSA_IP} + log_start + run_cmd nettest -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Global server, VRF client, local conn" + + log_start + run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + + log_start + run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "VRF server, device client, local conn" + + log_start + run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" + + log_start + run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" + + for a in ${VRF_IP} 127.0.0.1 + do + log_start + run_cmd nettest -D -s -2 ${VRF} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Global server, VRF client, local conn" + done + + for a in ${VRF_IP} 127.0.0.1 + do + log_start + run_cmd nettest -s -D -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + done + + # negative test - should fail + # verifies ECONNREFUSED + for a in ${NSA_IP} ${VRF_IP} 127.0.0.1 + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 1 "No server, VRF client, local conn" + done +} + +ipv4_udp() +{ + log_section "IPv4/UDP" + log_subsection "No VRF" + + setup + + # udp_l3mdev_accept should have no affect without VRF; + # run tests with it enabled and disabled to verify + log_subsection "udp_l3mdev_accept disabled" + set_sysctl net.ipv4.udp_l3mdev_accept=0 + ipv4_udp_novrf + log_subsection "udp_l3mdev_accept enabled" + set_sysctl net.ipv4.udp_l3mdev_accept=1 + ipv4_udp_novrf + + log_subsection "With VRF" + setup "yes" + ipv4_udp_vrf +} + +################################################################################ +# IPv4 address bind +# +# verifies ability or inability to bind to an address / device + +ipv4_addr_bind_novrf() +{ + # + # raw socket + # + for a in ${NSA_IP} ${NSA_LO_IP} + do + log_start + run_cmd nettest -s -R -P icmp -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address" + + log_start + run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" + done + + # + # tcp sockets + # + a=${NSA_IP} + log_start + run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address" + + log_start + run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not + #a=${NSA_LO_IP} + #log_start + #show_hint "Should fail with 'Cannot assign requested address'" + #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + #log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" +} + +ipv4_addr_bind_vrf() +{ + # + # raw socket + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest -s -R -P icmp -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address" + + log_start + run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" + log_start + run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind" + done + + a=${NSA_LO_IP} + log_start + show_hint "Address on loopback is out of VRF scope" + run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b + log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" + + # + # tcp sockets + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address" + + log_start + run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + done + + a=${NSA_LO_IP} + log_start + show_hint "Address on loopback out of scope for VRF" + run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" + + log_start + show_hint "Address on loopback out of scope for device in VRF" + run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" +} + +ipv4_addr_bind() +{ + log_section "IPv4 address binds" + + log_subsection "No VRF" + setup + ipv4_addr_bind_novrf + + log_subsection "With VRF" + setup "yes" + ipv4_addr_bind_vrf +} + +################################################################################ +# IPv4 runtime tests + +ipv4_rt() +{ + local desc="$1" + local varg="$2" + local with_vrf="yes" + local a + + # + # server tests + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server" + + setup ${with_vrf} + done + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest ${varg} -s -d ${VRF} & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server" + + setup ${with_vrf} + done + + a=${NSA_IP} + log_start + run_cmd nettest ${varg} -s -d ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, enslaved device server" + + setup ${with_vrf} + + # + # client test + # + log_start + run_cmd_nsb nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF client" + + setup ${with_vrf} + + log_start + run_cmd_nsb nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, enslaved device client" + + setup ${with_vrf} + + # + # local address tests + # + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server, VRF client, local" + + setup ${with_vrf} + done + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest ${varg} -d ${VRF} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server and client, local" + + setup ${with_vrf} + done + + a=${NSA_IP} + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server, enslaved device client, local" + + setup ${with_vrf} + + log_start + run_cmd nettest ${varg} -d ${VRF} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server, enslaved device client, local" + + setup ${with_vrf} + + log_start + run_cmd nettest ${varg} -d ${NSA_DEV} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, enslaved device server and client, local" +} + +ipv4_ping_rt() +{ + local with_vrf="yes" + local a + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd_nsb ping -f ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "Device delete with active traffic - ping in" + + setup ${with_vrf} + done + + a=${NSB_IP} + log_start + run_cmd ping -f -I ${VRF} ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "Device delete with active traffic - ping out" +} + +ipv4_runtime() +{ + log_section "Run time tests - ipv4" + + setup "yes" + ipv4_ping_rt + + setup "yes" + ipv4_rt "TCP active socket" "-n -1" + + setup "yes" + ipv4_rt "TCP passive socket" "-i" +} + +################################################################################ +# IPv6 + +ipv6_ping_novrf() +{ + local a + + # should not have an impact, but make a known state + set_sysctl net.ipv4.raw_l3mdev_accept=0 2>/dev/null + + # + # out + # + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} + do + log_start + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping out" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping out, device bind" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_LO_IP6} ${a} + log_test_addr ${a} $? 0 "ping out, loopback address bind" + done + + # + # in + # + for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV} ${MCAST}%${NSB_DEV} + do + log_start + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping in" + done + + # + # local traffic, local address + # + for a in ${NSA_IP6} ${NSA_LO_IP6} ::1 ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} + do + log_start + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping local, no bind" + done + + for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping local, device bind" + done + + for a in ${NSA_LO_IP6} ::1 + do + log_start + show_hint "Fails since address on loopback is out of device scope" + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping local, device bind" + done + + # + # ip rule blocks address + # + log_start + setup_cmd ip -6 rule add pref 32765 from all lookup local + setup_cmd ip -6 rule del pref 0 from all lookup local + setup_cmd ip -6 rule add pref 50 to ${NSB_LO_IP6} prohibit + setup_cmd ip -6 rule add pref 51 from ${NSB_IP6} prohibit + + a=${NSB_LO_IP6} + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, blocked by rule" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule" + + a=${NSA_LO_IP6} + log_start + show_hint "Response lost due to ip rule" + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by rule" + + setup_cmd ip -6 rule add pref 0 from all lookup local + setup_cmd ip -6 rule del pref 32765 from all lookup local + setup_cmd ip -6 rule del pref 50 to ${NSB_LO_IP6} prohibit + setup_cmd ip -6 rule del pref 51 from ${NSB_IP6} prohibit + + # + # route blocks reachability to remote address + # + log_start + setup_cmd ip -6 route del ${NSB_LO_IP6} + setup_cmd ip -6 route add unreachable ${NSB_LO_IP6} metric 10 + setup_cmd ip -6 route add unreachable ${NSB_IP6} metric 10 + + a=${NSB_LO_IP6} + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, blocked by route" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, blocked by route" + + a=${NSA_LO_IP6} + log_start + show_hint "Response lost due to ip route" + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by route" + + + # + # remove 'remote' routes; fallback to default + # + log_start + setup_cmd ip -6 ro del unreachable ${NSB_LO_IP6} + setup_cmd ip -6 ro del unreachable ${NSB_IP6} + + a=${NSB_LO_IP6} + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, unreachable route" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, unreachable route" +} + +ipv6_ping_vrf() +{ + local a + + # should default on; does not exist on older kernels + set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null + + # + # out + # + for a in ${NSB_IP6} ${NSB_LO_IP6} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 0 "ping out, VRF bind" + done + + for a in ${NSB_LINKIP6}%${VRF} ${MCAST}%${VRF} + do + log_start + show_hint "Fails since VRF device does not support linklocal or multicast" + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, VRF bind" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping out, device bind" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} + do + log_start + run_cmd ip vrf exec ${VRF} ${ping6} -c1 -w1 -I ${VRF_IP6} ${a} + log_test_addr ${a} $? 0 "ping out, vrf device+address bind" + done + + # + # in + # + for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} ${MCAST}%${NSB_DEV} + do + log_start + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 0 "ping in" + done + + a=${NSA_LO_IP6} + log_start + show_hint "Fails since loopback address is out of VRF scope" + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in" + + # + # local traffic, local address + # + for a in ${NSA_IP6} ${VRF_IP6} ::1 + do + log_start + show_hint "Source address should be ${a}" + run_cmd ${ping6} -c1 -w1 -I ${VRF} ${a} + log_test_addr ${a} $? 0 "ping local, VRF bind" + done + + for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} + do + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 0 "ping local, device bind" + done + + # LLA to GUA - remove ipv6 global addresses from ns-B + setup_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV} + setup_cmd_nsb ip -6 addr del ${NSB_LO_IP6}/128 dev lo + setup_cmd_nsb ip -6 ro add ${NSA_IP6}/128 via ${NSA_LINKIP6} dev ${NSB_DEV} + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6} + log_test_addr ${a} $? 0 "ping in, LLA to GUA" + done + + setup_cmd_nsb ip -6 ro del ${NSA_IP6}/128 via ${NSA_LINKIP6} dev ${NSB_DEV} + setup_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV} + setup_cmd_nsb ip -6 addr add ${NSB_LO_IP6}/128 dev lo + + # + # ip rule blocks address + # + log_start + setup_cmd ip -6 rule add pref 50 to ${NSB_LO_IP6} prohibit + setup_cmd ip -6 rule add pref 51 from ${NSB_IP6} prohibit + + a=${NSB_LO_IP6} + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, blocked by rule" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, blocked by rule" + + a=${NSA_LO_IP6} + log_start + show_hint "Response lost due to ip rule" + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 1 "ping in, blocked by rule" + + log_start + setup_cmd ip -6 rule del pref 50 to ${NSB_LO_IP6} prohibit + setup_cmd ip -6 rule del pref 51 from ${NSB_IP6} prohibit + + # + # remove 'remote' routes; fallback to default + # + log_start + setup_cmd ip -6 ro del ${NSB_LO_IP6} vrf ${VRF} + + a=${NSB_LO_IP6} + run_cmd ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping out, unreachable route" + + log_start + run_cmd ${ping6} -c1 -w1 -I ${NSA_DEV} ${a} + log_test_addr ${a} $? 2 "ping out, device bind, unreachable route" + + ip -netns ${NSB} -6 ro del ${NSA_LO_IP6} + a=${NSA_LO_IP6} + log_start + run_cmd_nsb ${ping6} -c1 -w1 ${a} + log_test_addr ${a} $? 2 "ping in, unreachable route" +} + +ipv6_ping() +{ + log_section "IPv6 ping" + + log_subsection "No VRF" + setup + ipv6_ping_novrf + + log_subsection "With VRF" + setup "yes" + ipv6_ping_vrf +} + +################################################################################ +# IPv6 TCP + +# +# MD5 tests without VRF +# +ipv6_tcp_md5_novrf() +{ + # + # single address + # + + # basic use case + log_start + run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: Single address config" + + # client sends MD5, server not configured + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -6 -s & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: Server no config, client uses password" + + # wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: Client uses wrong password" + + # client from different address + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: Client address does not match address configured with password" + + # + # MD5 extension - prefix length + # + + # client in prefix + log_start + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: Prefix config" + + # client in prefix, wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: Prefix config, client uses wrong password" + + # client outside of prefix + log_start + show_hint "Should timeout due to MD5 mismatch" + run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: Prefix config, client address not in configured prefix" +} + +# +# MD5 tests with VRF +# +ipv6_tcp_md5() +{ + # + # single address + # + + # basic use case + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Single address config" + + # client sends MD5, server not configured + log_start + show_hint "Should timeout since server does not have MD5 auth" + run_cmd nettest -6 -s -d ${VRF} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Server no config, client uses password" + + # wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Client uses wrong password" + + # client from different address + log_start + show_hint "Should timeout since server config differs from client" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Client address does not match address configured with password" + + # + # MD5 extension - prefix length + # + + # client in prefix + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Prefix config" + + # client in prefix, wrong password + log_start + show_hint "Should timeout since client uses wrong password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" + + # client outside of prefix + log_start + show_hint "Should timeout since client address is outside of prefix" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" + + # + # duplicate config between default VRF and a VRF + # + + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" + + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" + + log_start + show_hint "Should timeout since client in default VRF uses VRF password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" + + log_start + show_hint "Should timeout since client in VRF uses default VRF password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" + + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" + + log_start + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" + + log_start + show_hint "Should timeout since client in default VRF uses VRF password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW} + log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" + + log_start + show_hint "Should timeout since client in VRF uses default VRF password" + run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} & + run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & + sleep 1 + run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW} + log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" + + # + # negative tests + # + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6} + log_test $? 1 "MD5: VRF: Device must be a VRF - single address" + + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6} + log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + +} + +ipv6_tcp_novrf() +{ + local a + + # + # server tests + # + for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + run_cmd nettest -6 -s & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Global server" + done + + # verify TCP reset received + for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # client + # + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} + do + log_start + run_cmd_nsb nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Client" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} + do + log_start + run_cmd_nsb nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 0 "Client, device bind" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + # + # local address tests + # + for a in ${NSA_IP6} ${NSA_LO_IP6} ::1 + do + log_start + run_cmd nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Global server, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -r ${a} -0 ${a} + log_test_addr ${a} $? 0 "Device server, unbound client, local connection" + + for a in ${NSA_LO_IP6} ::1 + do + log_start + show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" + run_cmd nettest -6 -s -d ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -r ${a} + log_test_addr ${a} $? 1 "Device server, unbound client, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} + log_test_addr ${a} $? 0 "Global server, device client, local connection" + + for a in ${NSA_LO_IP6} ::1 + do + log_start + show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" + run_cmd nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "Global server, device client, local connection" + done + + for a in ${NSA_IP6} ${NSA_LINKIP6} + do + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Device server, device client, local conn" + done + + for a in ${NSA_IP6} ${NSA_LINKIP6} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 1 "No server, device client, local conn" + done + + ipv6_tcp_md5_novrf +} + +ipv6_tcp_vrf() +{ + local a + + # disable global server + log_subsection "Global server disabled" + + set_sysctl net.ipv4.tcp_l3mdev_accept=0 + + # + # server tests + # + for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + show_hint "Should fail 'Connection refused' since global server with VRF is disabled" + run_cmd nettest -6 -s & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 1 "Global server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "VRF server" + done + + # link local is always bound to ingress device + a=${NSA_LINKIP6}%${NSB_DEV} + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Device server" + done + + # verify TCP reset received + for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # local address tests + a=${NSA_IP6} + log_start + show_hint "Should fail 'Connection refused' since global server with VRF is disabled" + run_cmd nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "Global server, local connection" + + # run MD5 tests + ipv6_tcp_md5 + + # + # enable VRF global server + # + log_subsection "VRF Global server enabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Global server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "VRF server" + done + + # For LLA, child socket is bound to device + a=${NSA_LINKIP6}%${NSB_DEV} + log_start + run_cmd nettest -6 -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Global server" + + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "VRF server" + + for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 0 "Device server" + done + + # verify TCP reset received + for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # local address tests + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + show_hint "Fails 'Connection refused' since client is not in VRF" + run_cmd nettest -6 -s -d ${VRF} & + sleep 1 + run_cmd nettest -6 -r ${a} + log_test_addr ${a} $? 1 "Global server, local connection" + done + + + # + # client + # + for a in ${NSB_IP6} ${NSB_LO_IP6} + do + log_start + run_cmd_nsb nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${VRF} + log_test_addr ${a} $? 0 "Client, VRF bind" + done + + a=${NSB_LINKIP6} + log_start + show_hint "Fails since VRF device does not allow linklocal addresses" + run_cmd_nsb nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${VRF} + log_test_addr ${a} $? 1 "Client, VRF bind" + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6} + do + log_start + run_cmd_nsb nettest -6 -s & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 0 "Client, device bind" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -r ${a} -d ${VRF} + log_test_addr ${a} $? 1 "No server, VRF client" + done + + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6} + do + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + for a in ${NSA_IP6} ${VRF_IP6} ::1 + do + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} + log_test_addr ${a} $? 0 "VRF server, device client, local connection" + + a=${NSA_IP6} + log_start + show_hint "Should fail since unbound client is out of VRF scope" + run_cmd nettest -6 -s -d ${VRF} & + sleep 1 + run_cmd nettest -6 -r ${a} + log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" + + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} + log_test_addr ${a} $? 0 "Device server, VRF client, local connection" + + for a in ${NSA_IP6} ${NSA_LINKIP6} + do + log_start + run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} + log_test_addr ${a} $? 0 "Device server, device client, local connection" + done +} + +ipv6_tcp() +{ + log_section "IPv6/TCP" + log_subsection "No VRF" + setup + + # tcp_l3mdev_accept should have no affect without VRF; + # run tests with it enabled and disabled to verify + log_subsection "tcp_l3mdev_accept disabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=0 + ipv6_tcp_novrf + log_subsection "tcp_l3mdev_accept enabled" + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + ipv6_tcp_novrf + + log_subsection "With VRF" + setup "yes" + ipv6_tcp_vrf +} + +################################################################################ +# IPv6 UDP + +ipv6_udp_novrf() +{ + local a + + # + # server tests + # + for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Global server" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Device server" + done + + a=${NSA_LO_IP6} + log_start + run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Global server" + + # should fail since loopback address is out of scope for a device + # bound server, but it does not - hence this is more documenting + # behavior. + #log_start + #show_hint "Should fail since loopback address is out of scope" + #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + #sleep 1 + #run_cmd_nsb nettest -6 -D -r ${a} + #log_test_addr ${a} $? 1 "Device server" + + # negative test - should fail + for a in ${NSA_IP6} ${NSA_LO_IP6} ${NSA_LINKIP6}%${NSB_DEV} + do + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # client + # + for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} + do + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} + log_test_addr ${a} $? 0 "Client" + + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} + log_test_addr ${a} $? 0 "Client, device bind" + + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} + log_test_addr ${a} $? 0 "Client, device send via cmsg" + + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} + log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "No server, unbound client" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "No server, device client" + done + + # + # local address tests + # + for a in ${NSA_IP6} ${NSA_LO_IP6} ::1 + do + log_start + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} + log_test_addr ${a} $? 0 "Global server, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Device server, unbound client, local connection" + + for a in ${NSA_LO_IP6} ::1 + do + log_start + show_hint "Should fail 'Connection refused' since address is out of device scope" + run_cmd nettest -6 -s -D -d ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "Device server, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -D & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Global server, device client, local connection" + + log_start + run_cmd nettest -6 -s -D & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} + log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" + + log_start + run_cmd nettest -6 -s -D & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} + log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" + + for a in ${NSA_LO_IP6} ::1 + do + log_start + show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} + log_test_addr ${a} $? 1 "Global server, device client, local connection" + + log_start + show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C + log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" + + log_start + show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S + log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} + log_test_addr ${a} $? 0 "Device server, device client, local conn" + + log_start + show_hint "Should fail 'Connection refused'" + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 1 "No server, device client, local conn" + + # LLA to GUA + run_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV} + run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} + log_start + run_cmd nettest -6 -s -D & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${NSA_IP6} + log_test $? 0 "UDP in - LLA to GUA" + + run_cmd_nsb ip -6 ro del ${NSA_IP6}/128 dev ${NSB_DEV} + run_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV} nodad +} + +ipv6_udp_vrf() +{ + local a + + # disable global server + log_subsection "Global server disabled" + set_sysctl net.ipv4.udp_l3mdev_accept=0 + + # + # server tests + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + show_hint "Should fail 'Connection refused' since global server is disabled" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "Global server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "VRF server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server" + done + + # negative test - should fail + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + show_hint "Should fail 'Connection refused' since there is no server" + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # local address tests + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + show_hint "Should fail 'Connection refused' since global server is disabled" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 1 "Global server, VRF client, local conn" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${VRF} -s & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + done + + a=${NSA_IP6} + log_start + show_hint "Should fail 'Connection refused' since global server is disabled" + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 1 "Global server, device client, local conn" + + log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "VRF server, device client, local conn" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" + + # disable global server + log_subsection "Global server enabled" + set_sysctl net.ipv4.udp_l3mdev_accept=1 + + # + # server tests + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Global server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "VRF server" + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 0 "Enslaved device server" + done + + # negative test - should fail + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd_nsb nettest -6 -D -r ${a} + log_test_addr ${a} $? 1 "No server" + done + + # + # client tests + # + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} + log_test $? 0 "VRF client" + + # negative test - should fail + log_start + run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} + log_test $? 1 "No server, VRF client" + + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} + log_test $? 0 "Enslaved device client" + + # negative test - should fail + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} + log_test $? 1 "No server, enslaved device client" + + # + # local address tests + # + a=${NSA_IP6} + log_start + run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Global server, VRF client, local conn" + + #log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + + + a=${VRF_IP6} + log_start + run_cmd nettest -6 -D -s -2 ${VRF} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Global server, VRF client, local conn" + + log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" + + # negative test - should fail + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 1 "No server, VRF client, local conn" + done + + # device to global IP + a=${NSA_IP6} + log_start + run_cmd nettest -6 -D -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Global server, device client, local conn" + + log_start + run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "VRF server, device client, local conn" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${VRF} -r ${a} + log_test_addr ${a} $? 0 "Device server, VRF client, local conn" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 0 "Device server, device client, local conn" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} + log_test_addr ${a} $? 1 "No server, device client, local conn" + + + # link local addresses + log_start + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} + log_test $? 0 "Global server, linklocal IP" + + log_start + run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} + log_test $? 1 "No server, linklocal IP" + + + log_start + run_cmd_nsb nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} + log_test $? 0 "Enslaved device client, linklocal IP" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} + log_test $? 1 "No server, device client, peer linklocal IP" + + + log_start + run_cmd nettest -6 -D -s & + sleep 1 + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} + log_test $? 0 "Enslaved device client, local conn - linklocal IP" + + log_start + run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} + log_test $? 1 "No server, device client, local conn - linklocal IP" + + # LLA to GUA + run_cmd_nsb ip -6 addr del ${NSB_IP6}/64 dev ${NSB_DEV} + run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} + log_start + run_cmd nettest -6 -s -D & + sleep 1 + run_cmd_nsb nettest -6 -D -r ${NSA_IP6} + log_test $? 0 "UDP in - LLA to GUA" + + run_cmd_nsb ip -6 ro del ${NSA_IP6}/128 dev ${NSB_DEV} + run_cmd_nsb ip -6 addr add ${NSB_IP6}/64 dev ${NSB_DEV} nodad +} + +ipv6_udp() +{ + # should not matter, but set to known state + set_sysctl net.ipv4.udp_early_demux=1 + + log_section "IPv6/UDP" + log_subsection "No VRF" + setup + + # udp_l3mdev_accept should have no affect without VRF; + # run tests with it enabled and disabled to verify + log_subsection "udp_l3mdev_accept disabled" + set_sysctl net.ipv4.udp_l3mdev_accept=0 + ipv6_udp_novrf + log_subsection "udp_l3mdev_accept enabled" + set_sysctl net.ipv4.udp_l3mdev_accept=1 + ipv6_udp_novrf + + log_subsection "With VRF" + setup "yes" + ipv6_udp_vrf +} + +################################################################################ +# IPv6 address bind + +ipv6_addr_bind_novrf() +{ + # + # raw socket + # + for a in ${NSA_IP6} ${NSA_LO_IP6} + do + log_start + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address" + + log_start + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" + done + + # + # tcp sockets + # + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -l ${a} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address" + + log_start + run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + + a=${NSA_LO_IP6} + log_start + show_hint "Should fail with 'Cannot assign requested address'" + run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" +} + +ipv6_addr_bind_vrf() +{ + # + # raw socket + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind" + + log_start + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b + log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind" + done + + a=${NSA_LO_IP6} + log_start + show_hint "Address on loopback is out of VRF scope" + run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b + log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind" + + # + # tcp sockets + # + # address on enslaved device is valid for the VRF or device in a VRF + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind" + done + + a=${NSA_IP6} + log_start + run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + + a=${VRF_IP6} + log_start + run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + + a=${NSA_LO_IP6} + log_start + show_hint "Address on loopback out of scope for VRF" + run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF" + + log_start + show_hint "Address on loopback out of scope for device in VRF" + run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind" + +} + +ipv6_addr_bind() +{ + log_section "IPv6 address binds" + + log_subsection "No VRF" + setup + ipv6_addr_bind_novrf + + log_subsection "With VRF" + setup "yes" + ipv6_addr_bind_vrf +} + +################################################################################ +# IPv6 runtime tests + +ipv6_rt() +{ + local desc="$1" + local varg="-6 $2" + local with_vrf="yes" + local a + + # + # server tests + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server" + + setup ${with_vrf} + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest ${varg} -d ${VRF} -s & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server" + + setup ${with_vrf} + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest ${varg} -d ${NSA_DEV} -s & + sleep 1 + run_cmd_nsb nettest ${varg} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, enslaved device server" + + setup ${with_vrf} + done + + # + # client test + # + log_start + run_cmd_nsb nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test 0 0 "${desc}, VRF client" + + setup ${with_vrf} + + log_start + run_cmd_nsb nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test 0 0 "${desc}, enslaved device client" + + setup ${with_vrf} + + + # + # local address tests + # + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server, VRF client" + + setup ${with_vrf} + done + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest ${varg} -d ${VRF} -s & + sleep 1 + run_cmd nettest ${varg} -d ${VRF} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server and client" + + setup ${with_vrf} + done + + a=${NSA_IP6} + log_start + run_cmd nettest ${varg} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, global server, device client" + + setup ${with_vrf} + + log_start + run_cmd nettest ${varg} -d ${VRF} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, VRF server, device client" + + setup ${with_vrf} + + log_start + run_cmd nettest ${varg} -d ${NSA_DEV} -s & + sleep 1 + run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "${desc}, device server, device client" +} + +ipv6_ping_rt() +{ + local with_vrf="yes" + local a + + a=${NSA_IP6} + log_start + run_cmd_nsb ${ping6} -f ${a} & + sleep 3 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "Device delete with active traffic - ping in" + + setup ${with_vrf} + + log_start + run_cmd ${ping6} -f ${NSB_IP6} -I ${VRF} & + sleep 1 + run_cmd ip link del ${VRF} + sleep 1 + log_test_addr ${a} 0 0 "Device delete with active traffic - ping out" +} + +ipv6_runtime() +{ + log_section "Run time tests - ipv6" + + setup "yes" + ipv6_ping_rt + + setup "yes" + ipv6_rt "TCP active socket" "-n -1" + + setup "yes" + ipv6_rt "TCP passive socket" "-i" + + setup "yes" + ipv6_rt "UDP active socket" "-D -n -1" +} + +################################################################################ +# netfilter blocking connections + +netfilter_tcp_reset() +{ + local a + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest -s & + sleep 1 + run_cmd_nsb nettest -r ${a} + log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" + done +} + +netfilter_icmp() +{ + local stype="$1" + local arg + local a + + [ "${stype}" = "UDP" ] && arg="-D" + + for a in ${NSA_IP} ${VRF_IP} + do + log_start + run_cmd nettest ${arg} -s & + sleep 1 + run_cmd_nsb nettest ${arg} -r ${a} + log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" + done +} + +ipv4_netfilter() +{ + log_section "IPv4 Netfilter" + log_subsection "TCP reset" + + setup "yes" + run_cmd iptables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with tcp-reset + + netfilter_tcp_reset + + log_start + log_subsection "ICMP unreachable" + + log_start + run_cmd iptables -F + run_cmd iptables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with icmp-port-unreachable + run_cmd iptables -A INPUT -p udp --dport 12345 -j REJECT --reject-with icmp-port-unreachable + + netfilter_icmp "TCP" + netfilter_icmp "UDP" + + log_start + iptables -F +} + +netfilter_tcp6_reset() +{ + local a + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s & + sleep 1 + run_cmd_nsb nettest -6 -r ${a} + log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" + done +} + +netfilter_icmp6() +{ + local stype="$1" + local arg + local a + + [ "${stype}" = "UDP" ] && arg="$arg -D" + + for a in ${NSA_IP6} ${VRF_IP6} + do + log_start + run_cmd nettest -6 -s ${arg} & + sleep 1 + run_cmd_nsb nettest -6 ${arg} -r ${a} + log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" + done +} + +ipv6_netfilter() +{ + log_section "IPv6 Netfilter" + log_subsection "TCP reset" + + setup "yes" + run_cmd ip6tables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with tcp-reset + + netfilter_tcp6_reset + + log_subsection "ICMP unreachable" + + log_start + run_cmd ip6tables -F + run_cmd ip6tables -A INPUT -p tcp --dport 12345 -j REJECT --reject-with icmp6-port-unreachable + run_cmd ip6tables -A INPUT -p udp --dport 12345 -j REJECT --reject-with icmp6-port-unreachable + + netfilter_icmp6 "TCP" + netfilter_icmp6 "UDP" + + log_start + ip6tables -F +} + +################################################################################ +# specific use cases + +# VRF only. +# ns-A device enslaved to bridge. Verify traffic with and without +# br_netfilter module loaded. Repeat with SVI on bridge. +use_case_br() +{ + setup "yes" + + setup_cmd ip link set ${NSA_DEV} down + setup_cmd ip addr del dev ${NSA_DEV} ${NSA_IP}/24 + setup_cmd ip -6 addr del dev ${NSA_DEV} ${NSA_IP6}/64 + + setup_cmd ip link add br0 type bridge + setup_cmd ip addr add dev br0 ${NSA_IP}/24 + setup_cmd ip -6 addr add dev br0 ${NSA_IP6}/64 nodad + + setup_cmd ip li set ${NSA_DEV} master br0 + setup_cmd ip li set ${NSA_DEV} up + setup_cmd ip li set br0 up + setup_cmd ip li set br0 vrf ${VRF} + + rmmod br_netfilter 2>/dev/null + sleep 5 # DAD + + run_cmd ip neigh flush all + run_cmd ping -c1 -w1 -I br0 ${NSB_IP} + log_test $? 0 "Bridge into VRF - IPv4 ping out" + + run_cmd ip neigh flush all + run_cmd ${ping6} -c1 -w1 -I br0 ${NSB_IP6} + log_test $? 0 "Bridge into VRF - IPv6 ping out" + + run_cmd ip neigh flush all + run_cmd_nsb ping -c1 -w1 ${NSA_IP} + log_test $? 0 "Bridge into VRF - IPv4 ping in" + + run_cmd ip neigh flush all + run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6} + log_test $? 0 "Bridge into VRF - IPv6 ping in" + + modprobe br_netfilter + if [ $? -eq 0 ]; then + run_cmd ip neigh flush all + run_cmd ping -c1 -w1 -I br0 ${NSB_IP} + log_test $? 0 "Bridge into VRF with br_netfilter - IPv4 ping out" + + run_cmd ip neigh flush all + run_cmd ${ping6} -c1 -w1 -I br0 ${NSB_IP6} + log_test $? 0 "Bridge into VRF with br_netfilter - IPv6 ping out" + + run_cmd ip neigh flush all + run_cmd_nsb ping -c1 -w1 ${NSA_IP} + log_test $? 0 "Bridge into VRF with br_netfilter - IPv4 ping in" + + run_cmd ip neigh flush all + run_cmd_nsb ${ping6} -c1 -w1 ${NSA_IP6} + log_test $? 0 "Bridge into VRF with br_netfilter - IPv6 ping in" + fi + + setup_cmd ip li set br0 nomaster + setup_cmd ip li add br0.100 link br0 type vlan id 100 + setup_cmd ip li set br0.100 vrf ${VRF} up + setup_cmd ip addr add dev br0.100 172.16.101.1/24 + setup_cmd ip -6 addr add dev br0.100 2001:db8:101::1/64 nodad + + setup_cmd_nsb ip li add vlan100 link ${NSB_DEV} type vlan id 100 + setup_cmd_nsb ip addr add dev vlan100 172.16.101.2/24 + setup_cmd_nsb ip -6 addr add dev vlan100 2001:db8:101::2/64 nodad + setup_cmd_nsb ip li set vlan100 up + sleep 1 + + rmmod br_netfilter 2>/dev/null + + run_cmd ip neigh flush all + run_cmd ping -c1 -w1 -I br0.100 172.16.101.2 + log_test $? 0 "Bridge vlan into VRF - IPv4 ping out" + + run_cmd ip neigh flush all + run_cmd ${ping6} -c1 -w1 -I br0.100 2001:db8:101::2 + log_test $? 0 "Bridge vlan into VRF - IPv6 ping out" + + run_cmd ip neigh flush all + run_cmd_nsb ping -c1 -w1 172.16.101.1 + log_test $? 0 "Bridge vlan into VRF - IPv4 ping in" + + run_cmd ip neigh flush all + run_cmd_nsb ${ping6} -c1 -w1 2001:db8:101::1 + log_test $? 0 "Bridge vlan into VRF - IPv6 ping in" + + modprobe br_netfilter + if [ $? -eq 0 ]; then + run_cmd ip neigh flush all + run_cmd ping -c1 -w1 -I br0.100 172.16.101.2 + log_test $? 0 "Bridge vlan into VRF with br_netfilter - IPv4 ping out" + + run_cmd ip neigh flush all + run_cmd ${ping6} -c1 -w1 -I br0.100 2001:db8:101::2 + log_test $? 0 "Bridge vlan into VRF with br_netfilter - IPv6 ping out" + + run_cmd ip neigh flush all + run_cmd_nsb ping -c1 -w1 172.16.101.1 + log_test $? 0 "Bridge vlan into VRF - IPv4 ping in" + + run_cmd ip neigh flush all + run_cmd_nsb ${ping6} -c1 -w1 2001:db8:101::1 + log_test $? 0 "Bridge vlan into VRF - IPv6 ping in" + fi + + setup_cmd ip li del br0 2>/dev/null + setup_cmd_nsb ip li del vlan100 2>/dev/null +} + +use_cases() +{ + log_section "Use cases" + use_case_br +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -4 IPv4 tests only + -6 IPv6 tests only + -t <test> Test name/set to run + -p Pause on fail + -P Pause after each test + -v Be verbose +EOF +} + +################################################################################ +# main + +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_OTHER="use_cases" + +PAUSE_ON_FAIL=no +PAUSE=no + +while getopts :46t:pPvh o +do + case $o in + 4) TESTS=ipv4;; + 6) TESTS=ipv6;; + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=1;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# make sure we don't pause twice +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +# +# show user test config +# +if [ -z "$TESTS" ]; then + TESTS="$TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER" +elif [ "$TESTS" = "ipv4" ]; then + TESTS="$TESTS_IPV4" +elif [ "$TESTS" = "ipv6" ]; then + TESTS="$TESTS_IPV6" +fi + +which nettest >/dev/null +if [ $? -ne 0 ]; then + echo "'nettest' command not found; skipping tests" + exit 0 +fi + +declare -i nfail=0 +declare -i nsuccess=0 + +for t in $TESTS +do + case $t in + ipv4_ping|ping) ipv4_ping;; + ipv4_tcp|tcp) ipv4_tcp;; + ipv4_udp|udp) ipv4_udp;; + ipv4_bind|bind) ipv4_addr_bind;; + ipv4_runtime) ipv4_runtime;; + ipv4_netfilter) ipv4_netfilter;; + + ipv6_ping|ping6) ipv6_ping;; + ipv6_tcp|tcp6) ipv6_tcp;; + ipv6_udp|udp6) ipv6_udp;; + ipv6_bind|bind6) ipv6_addr_bind;; + ipv6_runtime) ipv6_runtime;; + ipv6_netfilter) ipv6_netfilter;; + + use_cases) use_cases;; + + # setup namespaces and config, but do not run any tests + setup) setup; exit 0;; + vrf_setup) setup "yes"; exit 0;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +cleanup 2>/dev/null + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index e6828732843e..9dc35a16e415 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -15,6 +15,8 @@ PAUSE_ON_FAIL=no VERBOSE=0 +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + ################################################################################ # helpers @@ -200,7 +202,7 @@ validate_v6_exception() local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping6 -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then @@ -243,7 +245,7 @@ do run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ping6 -c1 -w1 2001:db8:10${i}::1 + run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 [ $ret -ne 0 ] && break diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index c5c93d5fb3ad..796670ebc65b 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -212,6 +212,8 @@ check_output() printf " ${out}\n" printf " Expected:\n" printf " ${expected}\n\n" + else + echo " WARNING: Unexpected route entry" fi fi @@ -274,7 +276,7 @@ ipv6_fcnal() run_cmd "$IP nexthop get id 52" log_test $? 0 "Get nexthop by id" - check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1" + check_nexthop "id 52" "id 52 via 2001:db8:91::2 dev veth1 scope link" run_cmd "$IP nexthop del id 52" log_test $? 0 "Delete nexthop by id" @@ -479,12 +481,12 @@ ipv6_fcnal_runtime() run_cmd "$IP -6 nexthop add id 85 dev veth1" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85" log_test $? 0 "IPv6 route with device only nexthop" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium" run_cmd "$IP nexthop add id 123 group 81/85" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123" log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw" - check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 nexthop via 2001:db8:91::2 dev veth1 nexthop dev veth1" + check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium" # # IPv6 route with v4 nexthop - not allowed @@ -538,7 +540,7 @@ ipv4_fcnal() run_cmd "$IP nexthop get id 12" log_test $? 0 "Get nexthop by id" - check_nexthop "id 12" "id 12 via 172.16.1.2 src 172.16.1.1 dev veth1 scope link" + check_nexthop "id 12" "id 12 via 172.16.1.2 dev veth1 scope link" run_cmd "$IP nexthop del id 12" log_test $? 0 "Delete nexthop by id" @@ -685,7 +687,7 @@ ipv4_withv6_fcnal() set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 11 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 11 via inet6 ${lladdr} dev veth1" set -e run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1" @@ -694,11 +696,11 @@ ipv4_withv6_fcnal() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv6 nexthop with IPv4 route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" log_test $? 0 "IPv4 route with IPv6 gateway" - check_route "172.16.101.1" "172.16.101.1 via ${lladdr} dev veth1" + check_route "172.16.101.1" "172.16.101.1 via inet6 ${lladdr} dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 2001:db8:50::1 dev veth1" log_test $? 2 "IPv4 route with invalid IPv6 gateway" @@ -785,10 +787,10 @@ ipv4_fcnal_runtime() log_test $? 0 "IPv4 route with device only nexthop" check_route "172.16.101.1" "172.16.101.1 nhid 85 dev veth1" - run_cmd "$IP nexthop add id 122 group 21/85" - run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" + run_cmd "$IP nexthop add id 123 group 21/85" + run_cmd "$IP ro replace 172.16.101.1/32 nhid 123" log_test $? 0 "IPv4 multipath route with nexthop mix - dev only + gw" - check_route "172.16.101.1" "172.16.101.1 nhid 85 nexthop via 172.16.1.2 dev veth1 nexthop dev veth1" + check_route "172.16.101.1" "172.16.101.1 nhid 123 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop dev veth1 weight 1" # # IPv4 with IPv6 @@ -820,7 +822,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro replace 172.16.101.1/32 nhid 101" log_test $? 0 "IPv4 route with mixed v4-v6 multipath route" - check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" + check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" @@ -938,6 +940,20 @@ basic() run_cmd "$IP nexthop add id 104 group 1 dev veth1" log_test $? 2 "Nexthop group and device" + # Tests to ensure that flushing works as expected. + run_cmd "$IP nexthop add id 105 blackhole proto 99" + run_cmd "$IP nexthop add id 106 blackhole proto 100" + run_cmd "$IP nexthop add id 107 blackhole proto 99" + run_cmd "$IP nexthop flush proto 99" + check_nexthop "id 105" "" + check_nexthop "id 106" "id 106 blackhole proto 100" + check_nexthop "id 107" "" + run_cmd "$IP nexthop flush proto 100" + check_nexthop "id 106" "" + + run_cmd "$IP nexthop flush proto 100" + log_test $? 0 "Test proto flush" + run_cmd "$IP nexthop add id 104 group 1 blackhole" log_test $? 2 "Nexthop group and blackhole" diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 4465fc2dae14..6dd403103800 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,7 +9,7 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter" +TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr" VERBOSE=0 PAUSE_ON_FAIL=no @@ -17,6 +17,8 @@ PAUSE=no IP="ip -netns ns1" NS_EXEC="ip netns exec ns1" +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + log_test() { local rc=$1 @@ -614,6 +616,20 @@ fib_nexthop_test() cleanup } +fib_suppress_test() +{ + $IP link add dummy1 type dummy + $IP link set dummy1 up + $IP -6 route add default dev dummy1 + $IP -6 rule add table main suppress_prefixlength 0 + ping -f -c 1000 -W 1 1234::1 || true + $IP -6 rule del table main suppress_prefixlength 0 + $IP link del dummy1 + + # If we got here without crashing, we're good. + return 0 +} + ################################################################################ # Tests on route add and replace @@ -1086,7 +1102,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ping6 -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1422,6 +1438,27 @@ ipv4_addr_metric_test() fi log_test $rc 0 "Prefix route with metric on link up" + # explicitly check for metric changes on edge scenarios + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259" + run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260" + rc=$? + fi + log_test $rc 0 "Modify metric of .0/24 address" + + run_cmd "$IP addr flush dev dummy2" + run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260" + run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 261" + rc=$? + if [ $rc -eq 0 ]; then + check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261" + rc=$? + fi + log_test $rc 0 "Modify metric of address with peer route" + $IP li del dummy1 $IP li del dummy2 cleanup @@ -1463,6 +1500,55 @@ ipv4_route_metrics_test() route_cleanup } +ipv4_del_addr_test() +{ + echo + echo "IPv4 delete address route tests" + + setup + + set -e + $IP li add dummy1 type dummy + $IP li set dummy1 up + $IP li add dummy2 type dummy + $IP li set dummy2 up + $IP li add red type vrf table 1111 + $IP li set red up + $IP ro add vrf red unreachable default + $IP li set dummy2 vrf red + + $IP addr add dev dummy1 172.16.104.1/24 + $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.1/24 + $IP addr add dev dummy2 172.16.104.11/24 + $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + set +e + + # removing address from device in vrf should only remove route from vrf table + $IP addr del dev dummy2 172.16.104.11/24 + $IP ro ls vrf red | grep -q 172.16.105.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.105.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.11/24 + $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + + $IP addr del dev dummy1 172.16.104.11/24 + $IP ro ls | grep -q 172.16.105.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.105.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + $IP li del dummy1 + $IP li del dummy2 + cleanup +} + + ipv4_route_v6_gw_test() { local rc @@ -1591,10 +1677,12 @@ do fib_carrier_test|carrier) fib_carrier_test;; fib_rp_filter_test|rp_filter) fib_rp_filter_test;; fib_nexthop_test|nexthop) fib_nexthop_test;; + fib_suppress_test|suppress) fib_suppress_test;; ipv6_route_test|ipv6_rt) ipv6_route_test;; ipv4_route_test|ipv4_rt) ipv4_route_test;; ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; + ipv4_del_addr) ipv4_del_addr_test;; ipv6_route_metrics) ipv6_route_metrics_test;; ipv4_route_metrics) ipv4_route_metrics_test;; ipv4_route_v6_gw) ipv4_route_v6_gw_test;; diff --git a/tools/testing/selftests/net/fin_ack_lat.c b/tools/testing/selftests/net/fin_ack_lat.c new file mode 100644 index 000000000000..70187494b57a --- /dev/null +++ b/tools/testing/selftests/net/fin_ack_lat.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <netinet/in.h> +#include <netinet/tcp.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <unistd.h> + +static int child_pid; + +static unsigned long timediff(struct timeval s, struct timeval e) +{ + unsigned long s_us, e_us; + + s_us = s.tv_sec * 1000000 + s.tv_usec; + e_us = e.tv_sec * 1000000 + e.tv_usec; + if (s_us > e_us) + return 0; + return e_us - s_us; +} + +static void client(int port) +{ + int sock = 0; + struct sockaddr_in addr, laddr; + socklen_t len = sizeof(laddr); + struct linger sl; + int flag = 1; + int buffer; + struct timeval start, end; + unsigned long lat, sum_lat = 0, nr_lat = 0; + + while (1) { + gettimeofday(&start, NULL); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) + error(-1, errno, "socket creation"); + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sock, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + error(-1, errno, "setsockopt(linger)"); + + if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + &flag, sizeof(flag))) + error(-1, errno, "setsockopt(nodelay)"); + + addr.sin_family = AF_INET; + addr.sin_port = htons(port); + + if (inet_pton(AF_INET, "127.0.0.1", &addr.sin_addr) <= 0) + error(-1, errno, "inet_pton"); + + if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(-1, errno, "connect"); + + send(sock, &buffer, sizeof(buffer), 0); + if (read(sock, &buffer, sizeof(buffer)) == -1) + error(-1, errno, "waiting read"); + + gettimeofday(&end, NULL); + lat = timediff(start, end); + sum_lat += lat; + nr_lat++; + if (lat < 100000) + goto close; + + if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1) + error(-1, errno, "getsockname"); + printf("port: %d, lat: %lu, avg: %lu, nr: %lu\n", + ntohs(laddr.sin_port), lat, + sum_lat / nr_lat, nr_lat); +close: + fflush(stdout); + close(sock); + } +} + +static void server(int sock, struct sockaddr_in address) +{ + int accepted; + int addrlen = sizeof(address); + int buffer; + + while (1) { + accepted = accept(sock, (struct sockaddr *)&address, + (socklen_t *)&addrlen); + if (accepted < 0) + error(-1, errno, "accept"); + + if (read(accepted, &buffer, sizeof(buffer)) == -1) + error(-1, errno, "read"); + close(accepted); + } +} + +static void sig_handler(int signum) +{ + kill(SIGTERM, child_pid); + exit(0); +} + +int main(int argc, char const *argv[]) +{ + int sock; + int opt = 1; + struct sockaddr_in address; + struct sockaddr_in laddr; + socklen_t len = sizeof(laddr); + + if (signal(SIGTERM, sig_handler) == SIG_ERR) + error(-1, errno, "signal"); + + sock = socket(AF_INET, SOCK_STREAM, 0); + if (sock < 0) + error(-1, errno, "socket"); + + if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, + &opt, sizeof(opt)) == -1) + error(-1, errno, "setsockopt"); + + address.sin_family = AF_INET; + address.sin_addr.s_addr = INADDR_ANY; + /* dynamically allocate unused port */ + address.sin_port = 0; + + if (bind(sock, (struct sockaddr *)&address, sizeof(address)) < 0) + error(-1, errno, "bind"); + + if (listen(sock, 3) < 0) + error(-1, errno, "listen"); + + if (getsockname(sock, (struct sockaddr *)&laddr, &len) == -1) + error(-1, errno, "getsockname"); + + fprintf(stderr, "server port: %d\n", ntohs(laddr.sin_port)); + child_pid = fork(); + if (!child_pid) + client(ntohs(laddr.sin_port)); + else + server(sock, laddr); + + return 0; +} diff --git a/tools/testing/selftests/net/fin_ack_lat.sh b/tools/testing/selftests/net/fin_ack_lat.sh new file mode 100755 index 000000000000..a3ff6e0b2c7a --- /dev/null +++ b/tools/testing/selftests/net/fin_ack_lat.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test latency spikes caused by FIN/ACK handling race. + +set +x +set -e + +tmpfile=$(mktemp /tmp/fin_ack_latency.XXXX.log) + +cleanup() { + kill $(pidof fin_ack_lat) + rm -f $tmpfile +} + +trap cleanup EXIT + +do_test() { + RUNTIME=$1 + + ./fin_ack_lat | tee $tmpfile & + PID=$! + + sleep $RUNTIME + NR_SPIKES=$(wc -l $tmpfile | awk '{print $1}') + if [ $NR_SPIKES -gt 0 ] + then + echo "FAIL: $NR_SPIKES spikes detected" + return 1 + fi + return 0 +} + +do_test "30" +echo "test done" diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 8553a67a2322..40b076983239 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -4,19 +4,21 @@ ############################################################################## # Defines -DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ - | jq -r '.port | keys[]' | cut -d/ -f-2) -if [ -z "$DEVLINK_DEV" ]; then - echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" - exit 1 -fi -if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then - echo "SKIP: devlink device's bus is not PCI" - exit 1 -fi +if [[ ! -v DEVLINK_DEV ]]; then + DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + | jq -r '.port | keys[]' | cut -d/ -f-2) + if [ -z "$DEVLINK_DEV" ]; then + echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" + exit 1 + fi + if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then + echo "SKIP: devlink device's bus is not PCI" + exit 1 + fi -DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ - -n | cut -d" " -f3) + DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \ + -n | cut -d" " -f3) +fi ############################################################################## # Sanity checks @@ -27,6 +29,12 @@ if [ $? -ne 0 ]; then exit 1 fi +devlink help 2>&1 | grep trap &> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing devlink trap support" + exit 1 +fi + ############################################################################## # Devlink helpers @@ -190,3 +198,215 @@ devlink_tc_bind_pool_th_restore() devlink sb tc bind set $port tc $tc type $dir \ pool ${orig[0]} th ${orig[1]} } + +devlink_traps_num_get() +{ + devlink -j trap | jq '.[]["'$DEVLINK_DEV'"] | length' +} + +devlink_traps_get() +{ + devlink -j trap | jq -r '.[]["'$DEVLINK_DEV'"][].name' +} + +devlink_trap_type_get() +{ + local trap_name=$1; shift + + devlink -j trap show $DEVLINK_DEV trap $trap_name \ + | jq -r '.[][][].type' +} + +devlink_trap_action_set() +{ + local trap_name=$1; shift + local action=$1; shift + + # Pipe output to /dev/null to avoid expected warnings. + devlink trap set $DEVLINK_DEV trap $trap_name \ + action $action &> /dev/null +} + +devlink_trap_action_get() +{ + local trap_name=$1; shift + + devlink -j trap show $DEVLINK_DEV trap $trap_name \ + | jq -r '.[][][].action' +} + +devlink_trap_group_get() +{ + devlink -j trap show $DEVLINK_DEV trap $trap_name \ + | jq -r '.[][][].group' +} + +devlink_trap_metadata_test() +{ + local trap_name=$1; shift + local metadata=$1; shift + + devlink -jv trap show $DEVLINK_DEV trap $trap_name \ + | jq -e '.[][][].metadata | contains(["'$metadata'"])' \ + &> /dev/null +} + +devlink_trap_rx_packets_get() +{ + local trap_name=$1; shift + + devlink -js trap show $DEVLINK_DEV trap $trap_name \ + | jq '.[][][]["stats"]["rx"]["packets"]' +} + +devlink_trap_rx_bytes_get() +{ + local trap_name=$1; shift + + devlink -js trap show $DEVLINK_DEV trap $trap_name \ + | jq '.[][][]["stats"]["rx"]["bytes"]' +} + +devlink_trap_stats_idle_test() +{ + local trap_name=$1; shift + local t0_packets t0_bytes + local t1_packets t1_bytes + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + t0_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + sleep 1 + + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + t1_bytes=$(devlink_trap_rx_bytes_get $trap_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + return 0 + else + return 1 + fi +} + +devlink_traps_enable_all() +{ + local trap_name + + for trap_name in $(devlink_traps_get); do + devlink_trap_action_set $trap_name "trap" + done +} + +devlink_traps_disable_all() +{ + for trap_name in $(devlink_traps_get); do + devlink_trap_action_set $trap_name "drop" + done +} + +devlink_trap_groups_get() +{ + devlink -j trap group | jq -r '.[]["'$DEVLINK_DEV'"][].name' +} + +devlink_trap_group_action_set() +{ + local group_name=$1; shift + local action=$1; shift + + # Pipe output to /dev/null to avoid expected warnings. + devlink trap group set $DEVLINK_DEV group $group_name action $action \ + &> /dev/null +} + +devlink_trap_group_rx_packets_get() +{ + local group_name=$1; shift + + devlink -js trap group show $DEVLINK_DEV group $group_name \ + | jq '.[][][]["stats"]["rx"]["packets"]' +} + +devlink_trap_group_rx_bytes_get() +{ + local group_name=$1; shift + + devlink -js trap group show $DEVLINK_DEV group $group_name \ + | jq '.[][][]["stats"]["rx"]["bytes"]' +} + +devlink_trap_group_stats_idle_test() +{ + local group_name=$1; shift + local t0_packets t0_bytes + local t1_packets t1_bytes + + t0_packets=$(devlink_trap_group_rx_packets_get $group_name) + t0_bytes=$(devlink_trap_group_rx_bytes_get $group_name) + + sleep 1 + + t1_packets=$(devlink_trap_group_rx_packets_get $group_name) + t1_bytes=$(devlink_trap_group_rx_bytes_get $group_name) + + if [[ $t0_packets -eq $t1_packets && $t0_bytes -eq $t1_bytes ]]; then + return 0 + else + return 1 + fi +} + +devlink_trap_exception_test() +{ + local trap_name=$1; shift + local group_name=$1; shift + + devlink_trap_stats_idle_test $trap_name + check_fail $? "Trap stats idle when packets should have been trapped" + + devlink_trap_group_stats_idle_test $group_name + check_fail $? "Trap group idle when packets should have been trapped" +} + +devlink_trap_drop_test() +{ + local trap_name=$1; shift + local group_name=$1; shift + local dev=$1; shift + + # This is the common part of all the tests. It checks that stats are + # initially idle, then non-idle after changing the trap action and + # finally idle again. It also makes sure the packets are dropped and + # never forwarded. + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle with initial drop action" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle with initial drop action" + + + devlink_trap_action_set $trap_name "trap" + devlink_trap_stats_idle_test $trap_name + check_fail $? "Trap stats idle after setting action to trap" + devlink_trap_group_stats_idle_test $group_name + check_fail $? "Trap group stats idle after setting action to trap" + + devlink_trap_action_set $trap_name "drop" + + devlink_trap_stats_idle_test $trap_name + check_err $? "Trap stats not idle after setting action to drop" + devlink_trap_group_stats_idle_test $group_name + check_err $? "Trap group stats not idle after setting action to drop" + + tc_check_packets "dev $dev egress" 101 0 + check_err $? "Packets were not dropped" +} + +devlink_trap_drop_cleanup() +{ + local mz_pid=$1; shift + local dev=$1; shift + local proto=$1; shift + + kill $mz_pid && wait $mz_pid &> /dev/null + tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower +} diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh new file mode 100755 index 000000000000..eb8e2a23bbb4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -0,0 +1,318 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + same_speeds_autoneg_off + different_speeds_autoneg_off + combination_of_neg_on_and_off + advertise_subset_of_speeds + check_highest_speed_is_chosen + different_speeds_autoneg_on +" +NUM_NETIFS=2 +source lib.sh +source ethtool_lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy +} + +different_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local with_mode=$1; shift + local adver=$1; shift + + local -a speeds_arr + + speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) + if [[ ${#speeds_arr[@]} < 2 ]]; then + check_err 1 "cannot check different speeds. There are not enough speeds" + fi + + echo ${speeds_arr[0]} ${speeds_arr[1]} +} + +same_speeds_autoneg_off() +{ + # Check that when each of the reported speeds is forced, the links come + # up and are operational. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + ethtool_set $h2 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "speed $speed autoneg off" + log_test "force of same speed autoneg off" + log_info "speed = $speed" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_off() +{ + # Test that when we force different speeds, links are not up and ping + # fails. + RET=0 + + local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $h1 speed $speed1 autoneg off + ethtool_set $h2 speed $speed2 autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds" + + log_test "force of different speeds autoneg off" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +combination_of_neg_on_and_off() +{ + # Test that when one device is forced to a speed supported by both + # endpoints and the other device is configured to autoneg on, the links + # are up and ping passes. + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + + for speed in "${speeds_arr[@]}"; do + RET=0 + ethtool_set $h1 speed $speed autoneg off + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "h1-speed=$speed autoneg off, h2 autoneg on" + log_test "one side with autoneg off and another with autoneg on" + log_info "force speed = $speed" + done + + ethtool -s $h1 autoneg on +} + +hex_speed_value_get() +{ + local speed=$1; shift + + local shift_size=${speed_values[$speed]} + speed=$((0x1 << $"shift_size")) + printf "%#x" "$speed" +} + +subset_of_common_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local adver=$1; shift + + local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver)) + local speed_to_advertise=0 + local speed_to_remove=${speeds_arr[0]} + speed_to_remove+='base' + + local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver)) + + for speed in ${speeds_mode_arr[@]}; do + if [[ $speed != $speed_to_remove* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +speed_to_advertise_get() +{ + # The function returns the hex number that is composed by OR-ing all + # the modes corresponding to the provided speed. + local speed_without_mode=$1; shift + local supported_speeds=("$@"); shift + local speed_to_advertise=0 + + speed_without_mode+='base' + + for speed in ${supported_speeds[@]}; do + if [[ $speed == $speed_without_mode* ]]; then + speed=$(hex_speed_value_get $speed) + speed_to_advertise=$(($speed_to_advertise | \ + $speed)) + fi + + done + + # Convert to hex. + printf "%#x" "$speed_to_advertise" +} + +advertise_subset_of_speeds() +{ + # Test that when one device advertises a subset of speeds and another + # advertises a specific speed (but all modes of this speed), the links + # are up and ping passes. + RET=0 + + local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + ethtool_set $h1 advertise $speed_1_to_advertise + + if [ $RET != 0 ]; then + log_test "advertise subset of speeds" + return + fi + + local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1)) + # Check only speeds that h1 advertised. Remove the first speed. + unset speeds_arr_without_mode[0] + local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1)) + + for speed_value in ${speeds_arr_without_mode[@]}; do + RET=0 + local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \ + "${speeds_arr_with_mode[@]}") + ethtool_set $h2 advertise $speed_2_to_advertise + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" + + log_test "advertise subset of speeds" + log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise" + done + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +check_highest_speed_is_chosen() +{ + # Test that when one device advertises a subset of speeds, the other + # chooses the highest speed. This test checks configuration without + # traffic. + RET=0 + + local max_speed + local chosen_speed + local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) + + ethtool_set $h1 advertise $speed_to_advertise + + if [ $RET != 0 ]; then + log_test "check highest speed" + return + fi + + local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) + # Remove the first speed, h1 does not advertise this speed. + unset speeds_arr[0] + + max_speed=${speeds_arr[0]} + for current in ${speeds_arr[@]}; do + if [[ $current -gt $max_speed ]]; then + max_speed=$current + fi + done + + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + chosen_speed=$(ethtool $h1 | grep 'Speed:') + chosen_speed=${chosen_speed%"Mb/s"*} + chosen_speed=${chosen_speed#*"Speed: "} + ((chosen_speed == max_speed)) + check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed" + + log_test "check highest speed" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +different_speeds_autoneg_on() +{ + # Test that when we configure links to advertise different speeds, + # links are not up and ping fails. + RET=0 + + local -a speeds=($(different_speeds_get $h1 $h2 1 1)) + local speed1=${speeds[0]} + local speed2=${speeds[1]} + + speed1=$(hex_speed_value_get $speed1) + speed2=$(hex_speed_value_get $speed2) + + ethtool_set $h1 advertise $speed1 + ethtool_set $h2 advertise $speed2 + + if (($RET)); then + setup_wait_dev_with_timeout $h1 + setup_wait_dev_with_timeout $h2 + ping_do $h1 192.0.2.2 + check_fail $? "ping with different speeds autoneg on" + fi + + log_test "advertise different speeds autoneg on" + + ethtool -s $h2 autoneg on + ethtool -s $h1 autoneg on +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +declare -gA speed_values +eval "speed_values=($(speeds_arr_get))" + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh new file mode 100755 index 000000000000..925d229a59d8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +speeds_arr_get() +{ + cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \ + {sub(/,$/, "") \ + sub(/ETHTOOL_LINK_MODE_/,"") \ + sub(/_BIT/,"") \ + sub(/_Full/,"/Full") \ + sub(/_Half/,"/Half");\ + print "["$1"]="$3}' + + awk "${cmd}" /usr/include/linux/ethtool.h +} + +ethtool_set() +{ + local cmd="$@" + local out=$(ethtool -s $cmd 2>&1 | wc -l) + + check_err $out "error in configuration. $cmd" +} + +dev_speeds_get() +{ + local dev=$1; shift + local with_mode=$1; shift + local adver=$1; shift + local speeds_str + + if (($adver)); then + mode="Advertised link modes" + else + mode="Supported link modes" + fi + + speeds_str=$(ethtool "$dev" | \ + # Snip everything before the link modes section. + sed -n '/'"$mode"':/,$p' | \ + # Quit processing the rest at the start of the next section. + # When checking, skip the header of this section (hence the 2,). + sed -n '2,${/^[\t][^ \t]/q};p' | \ + # Drop the section header of the current section. + cut -d':' -f2) + + local -a speeds_arr=($speeds_str) + if [[ $with_mode -eq 0 ]]; then + for ((i=0; i<${#speeds_arr[@]}; i++)); do + speeds_arr[$i]=${speeds_arr[$i]%base*} + done + fi + echo ${speeds_arr[@]} +} + +common_speeds_get() +{ + dev1=$1; shift + dev2=$1; shift + with_mode=$1; shift + adver=$1; shift + + local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver)) + local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver)) + + comm -12 \ + <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ + <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) +} diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh new file mode 100644 index 000000000000..66496659bea7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh @@ -0,0 +1,873 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Various helpers and tests to verify FIB offload. + +__fib_trap_check() +{ + local ns=$1; shift + local family=$1; shift + local route=$1; shift + local should_fail=$1; shift + local ret + + ip -n $ns -j -p -$family route show $route \ + | jq -e '.[]["flags"] | contains(["trap"])' &> /dev/null + ret=$? + if [[ $should_fail == "true" ]]; then + if [[ $ret -ne 0 ]]; then + return 0 + else + return 1 + fi + fi + + return $ret +} + +fib_trap_check() +{ + local ns=$1; shift + local family=$1; shift + local route=$1; shift + local should_fail=$1; shift + + busywait 5000 __fib_trap_check $ns $family "$route" $should_fail +} + +fib4_trap_check() +{ + local ns=$1; shift + local route=$1; shift + local should_fail=$1; shift + + fib_trap_check $ns 4 "$route" $should_fail +} + +fib6_trap_check() +{ + local ns=$1; shift + local route=$1; shift + local should_fail=$1; shift + + fib_trap_check $ns 6 "$route" $should_fail +} + +fib_ipv4_identical_routes_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 192.0.2.0/24 dev dummy2 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 tos 0 metric 1024" true + check_err $? "Appended route in hardware when should not" + + ip -n $ns route prepend 192.0.2.0/24 dev dummy3 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy3 tos 0 metric 1024" false + check_err $? "Prepended route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true + check_err $? "Route was not replaced in hardware by prepended one" + + log_test "IPv4 identical routes" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv4_tos_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false + check_err $? "Highest TOS route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true + check_err $? "Lowest TOS route still in hardware when should not" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true + check_err $? "Middle TOS route in hardware when should not" + + log_test "IPv4 routes with TOS" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_metric_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1022 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1023 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv4 routes with metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replace_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025 + ip -n $ns route replace 192.0.2.0/24 dev dummy2 metric 1025 + + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib4_trap_check $ns "192.0.2.0/24 dev dummy2 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv4 route replace" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv4_delete_test() +{ + local ns=$1; shift + local metric + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Insert multiple routes with the same prefix and length and varying + # metrics. Make sure that throughout delete operations the lowest + # metric route is the one in hardware. + for metric in $(seq 1024 1026); do + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric + done + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1024 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false + check_err $? "Lowest metric route not in hardware when should" + + ip -n $ns route del 192.0.2.0/24 dev dummy1 metric 1026 + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" false + check_err $? "Sole route not in hardware when should" + + log_test "IPv4 route delete" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_plen_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Add two routes with the same key and different prefix length and + # make sure both are in hardware. It can be verfied that both are + # sharing the same leaf by checking the /proc/net/fib_trie + ip -n $ns route add 192.0.2.0/24 dev dummy1 + ip -n $ns route add 192.0.2.0/25 dev dummy1 + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false + check_err $? "/24 not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false + check_err $? "/25 not in hardware when should" + + log_test "IPv4 routes with different prefix length" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_metric_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1024 + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric 1025 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv4 routes replay - metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_tos_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0 + ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false + check_err $? "Highest TOS route not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true + check_err $? "Lowest TOS route in hardware when should not" + + log_test "IPv4 routes replay - TOS" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_replay_plen_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 192.0.2.0/24 dev dummy1 + ip -n $ns route add 192.0.2.0/25 dev dummy1 + + devlink -N $ns dev reload $devlink_dev + + fib4_trap_check $ns "192.0.2.0/24 dev dummy1" false + check_err $? "/24 not in hardware when should" + + fib4_trap_check $ns "192.0.2.0/25 dev dummy1" false + check_err $? "/25 not in hardware when should" + + log_test "IPv4 routes replay - prefix length" + + ip -n $ns link del dev dummy1 +} + +fib_ipv4_flush_test() +{ + local ns=$1; shift + local metric + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + # Exercise the routes flushing code paths by inserting various + # prefix routes on a netdev and then deleting it. + for metric in $(seq 1 20); do + ip -n $ns route add 192.0.2.0/24 dev dummy1 metric $metric + done + + ip -n $ns link del dev dummy1 + + log_test "IPv4 routes flushing" +} + +fib_ipv6_add_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:1::/64 dev dummy2 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" true + check_err $? "Route in hardware when should not" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after appending route" + + log_test "IPv6 single route add" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_metric_test() +{ + local ns=$1; shift + + RET=0 + + ip -n $ns link add name dummy1 type dummy + ip -n $ns link set dev dummy1 up + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1022 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1023 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv6 routes with metric" + + ip -n $ns link del dev dummy1 +} + +fib_ipv6_append_single_test() +{ + local ns=$1; shift + + # When an IPv6 multipath route is added without the 'nexthop' keyword, + # different code paths are taken compared to when the keyword is used. + # This test tries to verify the former. + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after appending" + + ip -n $ns route add 2001:db8:10::/64 via 2001:db8:1::2 metric 1025 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not" + + ip -n $ns route append 2001:db8:10::/64 via 2001:db8:2::2 metric 1025 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not after appending" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + log_test "IPv6 append single route without 'nexthop' keyword" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replace_single_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1024 + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 2001:db8:1::/64 dev dummy1 metric 1025 + ip -n $ns route replace 2001:db8:1::/64 dev dummy2 metric 1025 + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv6 single route replace" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_metric_multipath_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route add 2001:db8:10::/64 metric 1022 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1022" false + check_err $? "Lowest metric route not in hardware when should" + + ip -n $ns route add 2001:db8:10::/64 metric 1023 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" true + check_err $? "Highest metric route still in hardware when should not" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1023" true + check_err $? "Middle metric route in hardware when should not" + + log_test "IPv6 multipath routes with metric" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_append_multipath_test() +{ + local ns=$1; shift + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:2::2 dev dummy2 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after appending" + + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not" + + ip -n $ns route append 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:2::2 dev dummy2 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Route in hardware when should not after appending" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + + log_test "IPv6 append multipath route with 'nexthop' keyword" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replace_multipath_test() +{ + local ns=$1; shift + local i + + RET=0 + + for i in $(seq 1 3); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route replace 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:3::2 dev dummy3 + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Replacement route not in hardware when should" + + # Add a route with an higher metric and make sure that replacing it + # does not affect the lower metric one. + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route replace 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:3::2 dev dummy3 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Lowest metric route not in hardware when should" + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Highest metric route in hardware when should not" + + log_test "IPv6 multipath route replace" + + for i in $(seq 1 3); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_append_multipath_to_single_test() +{ + local ns=$1; shift + + # Test that when the first route in the leaf is not a multipath route + # and we try to append a multipath route with the same metric to it, it + # is not notified. + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware when should" + + ip -n $ns route append 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:2::2 dev dummy2 + fib6_trap_check $ns "2001:db8:10::/64 dev dummy2 metric 1024" true + check_err $? "Route in hardware when should not" + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after append" + + log_test "IPv6 append multipath route to non-multipath route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_delete_single_test() +{ + local ns=$1; shift + + # Test various deletion scenarios, where only a single route is + # deleted from the FIB node. + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + # Test deletion of a single route when it is the only route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + log_test "IPv6 delete sole single route" + + # Test that deletion of last route does not affect the first one. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1024" false + check_err $? "Route not in hardware after deleting higher metric route" + + log_test "IPv6 delete single route not in hardware" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + # Test that first route is replaced by next single route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete single route - replaced by single" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + # Test that first route is replaced by next multipath route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1024 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete single route - replaced by multipath" + + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + # Test deletion of a single nexthop from a multipath route. + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after deleting a single nexthop" + + log_test "IPv6 delete single nexthop" + + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_delete_multipath_test() +{ + local ns=$1; shift + + # Test various deletion scenarios, where an entire multipath route is + # deleted from the FIB node. + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + # Test deletion of a multipath route when it is the only route in the + # FIB node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + log_test "IPv6 delete sole multipath route" + + # Test that deletion of last route does not affect the first one. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "Route not in hardware after deleting higher metric route" + + log_test "IPv6 delete multipath route not in hardware" + + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + # Test that first route is replaced by next single route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 dev dummy1 metric 1025 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 dev dummy1 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete multipath route - replaced by single" + + ip -n $ns route del 2001:db8:10::/64 dev dummy1 metric 1025 + + # Test that first route is replaced by next multipath route in the FIB + # node. + RET=0 + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route del 2001:db8:10::/64 metric 1024 + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" false + check_err $? "Route not in hardware after deleting lowest metric route" + + log_test "IPv6 delete multipath route - replaced by multipath" + + ip -n $ns route del 2001:db8:10::/64 metric 1025 + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replay_single_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + done + + ip -n $ns route add 2001:db8:1::/64 dev dummy1 + ip -n $ns route append 2001:db8:1::/64 dev dummy2 + + devlink -N $ns dev reload $devlink_dev + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy1" false + check_err $? "First route not in hardware when should" + + fib6_trap_check $ns "2001:db8:1::/64 dev dummy2" true + check_err $? "Second route in hardware when should not" + + log_test "IPv6 routes replay - single route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} + +fib_ipv6_replay_multipath_test() +{ + local ns=$1; shift + local devlink_dev=$1; shift + + RET=0 + + for i in $(seq 1 2); do + ip -n $ns link add name dummy$i type dummy + ip -n $ns link set dev dummy$i up + ip -n $ns address add 2001:db8:$i::1/64 dev dummy$i + done + + ip -n $ns route add 2001:db8:10::/64 metric 1024 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + ip -n $ns route add 2001:db8:10::/64 metric 1025 \ + nexthop via 2001:db8:1::2 dev dummy1 \ + nexthop via 2001:db8:2::2 dev dummy2 + + devlink -N $ns dev reload $devlink_dev + + fib6_trap_check $ns "2001:db8:10::/64 metric 1024" false + check_err $? "First route not in hardware when should" + + fib6_trap_check $ns "2001:db8:10::/64 metric 1025" true + check_err $? "Second route in hardware when should not" + + log_test "IPv6 routes replay - multipath route" + + for i in $(seq 1 2); do + ip -n $ns link del dev dummy$i + done +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 9385dc971269..2f5da414aaa7 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -18,6 +18,8 @@ NETIF_CREATE=${NETIF_CREATE:=yes} MCD=${MCD:=smcrouted} MC_CLI=${MC_CLI:=smcroutectl} PING_TIMEOUT=${PING_TIMEOUT:=5} +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -223,33 +225,119 @@ log_info() echo "INFO: $msg" } +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + +until_counter_is() +{ + local value=$1; shift + local current=$("$@") + + echo $((current)) + ((current >= value)) +} + +busywait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + busywait "$timeout" until_counter_is $((base + delta)) "$@" +} + setup_wait_dev() { local dev=$1; shift + local wait_time=${1:-$WAIT_TIME}; shift + + setup_wait_dev_with_timeout "$dev" $INTERFACE_TIMEOUT $wait_time + + if (($?)); then + check_err 1 + log_test setup_wait_dev ": Interface $dev does not come up." + exit 1 + fi +} + +setup_wait_dev_with_timeout() +{ + local dev=$1; shift + local max_iterations=${1:-$WAIT_TIMEOUT}; shift + local wait_time=${1:-$WAIT_TIME}; shift + local i - while true; do + for ((i = 1; i <= $max_iterations; ++i)); do ip link show dev $dev up \ | grep 'state UP' &> /dev/null if [[ $? -ne 0 ]]; then sleep 1 else - break + sleep $wait_time + return 0 fi done + + return 1 } setup_wait() { local num_netifs=${1:-$NUM_NETIFS} + local i for ((i = 1; i <= num_netifs; ++i)); do - setup_wait_dev ${NETIFS[p$i]} + setup_wait_dev ${NETIFS[p$i]} 0 done # Make sure links are ready. sleep $WAIT_TIME } +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} + lldpad_app_wait_set() { local dev=$1; shift @@ -505,9 +593,10 @@ tc_rule_stats_get() local dev=$1; shift local pref=$1; shift local dir=$1; shift + local selector=${1:-.packets}; shift tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ - | jq '.[1].options.actions[].stats.packets' + | jq ".[1].options.actions[].stats$selector" } ethtool_stats_get() @@ -518,6 +607,30 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + mac_get() { local if_name=$1 @@ -1018,3 +1131,21 @@ flood_test() flood_unicast_test $br_port $host1_if $host2_if flood_multicast_test $br_port $host1_if $host2_if } + +start_traffic() +{ + local h_in=$1; shift # Where the traffic egresses the host + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + + $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + -a own -b $dmac -t udp -q & + sleep 1 +} + +stop_traffic() +{ + # Suppress noise from killing mausezahn. + { kill %% && wait %%; } 2>/dev/null +} diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh index 6e4626ae71b0..8f4057310b5b 100755 --- a/tools/testing/selftests/net/forwarding/loopback.sh +++ b/tools/testing/selftests/net/forwarding/loopback.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + ALL_TESTS="loopback_test" NUM_NETIFS=2 source tc_common.sh @@ -72,6 +75,11 @@ setup_prepare() h1_create h2_create + + if ethtool -k $h1 | grep loopback | grep -q fixed; then + log_test "SKIP: dev $h1 does not support loopback feature" + exit $ksft_skip + fi } cleanup() diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index a75cb51cc5bd..057f91b05098 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -1,9 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6" +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + sip_in_class_e + mc_mac_mismatch + ipv4_sip_equal_dip + ipv6_sip_equal_dip + ipv4_dip_link_local +" + NUM_NETIFS=4 source lib.sh +source tc_common.sh + +require_command $MCD +require_command $MC_CLI +table_name=selftests h1_create() { @@ -64,6 +78,8 @@ router_create() ip link set dev $rp1 up ip link set dev $rp2 up + tc qdisc add dev $rp2 clsact + ip address add 192.0.2.1/24 dev $rp1 ip address add 2001:db8:1::1/64 dev $rp1 @@ -79,10 +95,31 @@ router_destroy() ip address del 2001:db8:1::1/64 dev $rp1 ip address del 192.0.2.1/24 dev $rp1 + tc qdisc del dev $rp2 clsact + ip link set dev $rp2 down ip link set dev $rp1 down } +start_mcd() +{ + SMCROUTEDIR="$(mktemp -d)" + + for ((i = 1; i <= $NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + $SMCROUTEDIR/$table_name.conf + done + + $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ + -P $SMCROUTEDIR/$table_name.pid +} + +kill_mcd() +{ + pkill $MCD + rm -rf $SMCROUTEDIR +} + setup_prepare() { h1=${NETIFS[p1]} @@ -91,6 +128,10 @@ setup_prepare() rp2=${NETIFS[p3]} h2=${NETIFS[p4]} + rp1mac=$(mac_get $rp1) + + start_mcd + vrf_prepare h1_create @@ -113,6 +154,8 @@ cleanup() h1_destroy vrf_cleanup + + kill_mcd } ping_ipv4() @@ -125,6 +168,150 @@ ping_ipv6() ping6_test $h1 2001:db8:2::2 } +sip_in_class_e() +{ + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.$rp1.rp_filter 0 + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower src_ip 240.0.0.1 ip_proto udp action pass + + $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \ + -A 240.0.0.1 -b $rp1mac -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "Source IP in class E" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf.$rp1.rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + +create_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs +} + +delete_mcast_sg() +{ + local if_name=$1; shift + local s_addr=$1; shift + local mcast=$1; shift + local dest_ifs=${@} + + $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs +} + +__mc_mac_mismatch() +{ + local desc=$1; shift + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local flags=${1:-""}; shift + local dmac=01:02:03:04:05:06 + + RET=0 + + tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \ + flower dst_ip $dip action pass + + create_mcast_sg $rp1 $sip $dip $rp2 + + $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $dmac \ + -B $dip -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "Multicast MAC mismatch: $desc" + + delete_mcast_sg $rp1 $sip $dip $rp2 + tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower +} + +mc_mac_mismatch() +{ + __mc_mac_mismatch "IPv4" "ip" 192.0.2.2 225.1.2.3 + __mc_mac_mismatch "IPv6" "ipv6" 2001:db8:1::2 ff0e::3 "-6" +} + +ipv4_sip_equal_dip() +{ + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf.$rp1.rp_filter 0 + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower src_ip 198.51.100.2 action pass + + $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \ + -A 198.51.100.2 -b $rp1mac -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "Source IP is equal to destination IP: IPv4" + + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf.$rp1.rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + +ipv6_sip_equal_dip() +{ + RET=0 + + tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \ + flower src_ip 2001:db8:2::2 action pass + + $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \ + -A 2001:db8:2::2 -b $rp1mac -B 2001:db8:2::2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "Source IP is equal to destination IP: IPv6" + + tc filter del dev $rp2 egress protocol ipv6 pref 1 handle 101 flower +} + +ipv4_dip_link_local() +{ + local dip=169.254.1.1 + + RET=0 + + tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \ + flower dst_ip $dip action pass + + ip neigh add 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2 + ip route add 169.254.1.0/24 dev $rp2 + + $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $rp1mac -B $dip -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 destination IP is link-local" + + ip route del 169.254.1.0/24 dev $rp2 + ip neigh del 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2 + tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh index fef88eb4b873..fa6a88c50750 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh @@ -36,7 +36,7 @@ h2_destroy() { ip -6 route del 2001:db8:1::/64 vrf v$h2 ip -4 route del 192.0.2.0/28 vrf v$h2 - simple_if_fini $h2 192.0.2.130/28 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 } router_create() diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh new file mode 100755 index 000000000000..40e0ad1bc4f2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# A driver for the ETS selftest that implements testing in slowpath. +lib_dir=. +source sch_ets_core.sh + +ALL_TESTS=" + ping_ipv4 + priomap_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr + classifier_mode + ets_test_strict + ets_test_mixed + ets_test_dwrr +" + +switch_create() +{ + ets_switch_create + + # Create a bottleneck so that the DWRR process can kick in. + tc qdisc add dev $swp2 root handle 1: tbf \ + rate 1Gbit burst 1Mbit latency 100ms + PARENT="parent 1:" +} + +switch_destroy() +{ + ets_switch_destroy + tc qdisc del dev $swp2 root +} + +# Callback from sch_ets_tests.sh +get_stats() +{ + local stream=$1; shift + + link_stats_get $h2.1$stream rx bytes +} + +ets_run diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh new file mode 100644 index 000000000000..f906fcc66572 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -0,0 +1,300 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This is a template for ETS Qdisc test. +# +# This test sends from H1 several traffic streams with 802.1p-tagged packets. +# The tags are used at $swp1 to prioritize the traffic. Each stream is then +# queued at a different ETS band according to the assigned priority. After +# runnig for a while, counters at H2 are consulted to determine whether the +# traffic scheduling was according to the ETS configuration. +# +# This template is supposed to be embedded by a test driver, which implements +# statistics collection, any HW-specific stuff, and prominently configures the +# system to assure that there is overcommitment at $swp2. That is necessary so +# that the ETS traffic selection algorithm kicks in and has to schedule some +# traffic at the expense of other. +# +# A driver for veth-based testing is in sch_ets.sh, an example of a driver for +# an offloaded data path is in selftests/drivers/net/mlxsw/sch_ets.sh. +# +# +---------------------------------------------------------------------+ +# | H1 | +# | + $h1.10 + $h1.11 + $h1.12 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | 192.0.2.33/28 | +# | | egress-qos-map | egress-qos-map | egress-qos-map | +# | | 0:0 | 0:1 | 0:2 | +# | \____________________ | ____________________/ | +# | \|/ | +# | + $h1 | +# +---------------------------|-----------------------------------------+ +# | +# +---------------------------|-----------------------------------------+ +# | SW + $swp1 | +# | | >1Gbps | +# | ____________________/|\____________________ | +# | / | \ | +# | +--|----------------+ +--|----------------+ +--|----------------+ | +# | | + $swp1.10 | | + $swp1.11 | | + $swp1.12 | | +# | | ingress-qos-map| | ingress-qos-map| | ingress-qos-map| | +# | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | +# | | | | | | | | +# | | BR10 | | BR11 | | BR12 | | +# | | | | | | | | +# | | + $swp2.10 | | + $swp2.11 | | + $swp2.12 | | +# | +--|----------------+ +--|----------------+ +--|----------------+ | +# | \____________________ | ____________________/ | +# | \|/ | +# | + $swp2 | +# | | 1Gbps (ethtool or HTB qdisc) | +# | | qdisc ets quanta $W0 $W1 $W2 | +# | | priomap 0 1 2 | +# +---------------------------|-----------------------------------------+ +# | +# +---------------------------|-----------------------------------------+ +# | H2 + $h2 | +# | ____________________/|\____________________ | +# | / | \ | +# | + $h2.10 + $h2.11 + $h2.12 | +# | 192.0.2.2/28 192.0.2.18/28 192.0.2.34/28 | +# +---------------------------------------------------------------------+ + +NUM_NETIFS=4 +CHECK_TC=yes +source $lib_dir/lib.sh +source $lib_dir/sch_ets_tests.sh + +PARENT=root +QDISC_DEV= + +sip() +{ + echo 192.0.2.$((16 * $1 + 1)) +} + +dip() +{ + echo 192.0.2.$((16 * $1 + 2)) +} + +# Callback from sch_ets_tests.sh +ets_start_traffic() +{ + local dst_mac=$(mac_get $h2) + local i=$1; shift + + start_traffic $h1.1$i $(sip $i) $(dip $i) $dst_mac +} + +ETS_CHANGE_QDISC= + +priomap_mode() +{ + echo "Running in priomap mode" + ets_delete_qdisc + ETS_CHANGE_QDISC=ets_change_qdisc_priomap +} + +classifier_mode() +{ + echo "Running in classifier mode" + ets_delete_qdisc + ETS_CHANGE_QDISC=ets_change_qdisc_classifier +} + +ets_change_qdisc_priomap() +{ + local dev=$1; shift + local nstrict=$1; shift + local priomap=$1; shift + local quanta=("${@}") + + local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi) + + tc qdisc $op dev $dev $PARENT handle 10: ets \ + $(if ((nstrict)); then echo strict $nstrict; fi) \ + $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) \ + priomap $priomap + QDISC_DEV=$dev +} + +ets_change_qdisc_classifier() +{ + local dev=$1; shift + local nstrict=$1; shift + local priomap=$1; shift + local quanta=("${@}") + + local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi) + + tc qdisc $op dev $dev $PARENT handle 10: ets \ + $(if ((nstrict)); then echo strict $nstrict; fi) \ + $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) + + if [[ $op == add ]]; then + local prio=0 + local band + + for band in $priomap; do + tc filter add dev $dev parent 10: basic \ + match "meta(priority eq $prio)" \ + flowid 10:$((band + 1)) + ((prio++)) + done + fi + QDISC_DEV=$dev +} + +# Callback from sch_ets_tests.sh +ets_change_qdisc() +{ + if [[ -z "$ETS_CHANGE_QDISC" ]]; then + exit 1 + fi + $ETS_CHANGE_QDISC "$@" +} + +ets_delete_qdisc() +{ + if [[ -n $QDISC_DEV ]]; then + tc qdisc del dev $QDISC_DEV $PARENT + QDISC_DEV= + fi +} + +h1_create() +{ + local i; + + simple_if_init $h1 + mtu_set $h1 9900 + for i in {0..2}; do + vlan_create $h1 1$i v$h1 $(sip $i)/28 + ip link set dev $h1.1$i type vlan egress 0:$i + done +} + +h1_destroy() +{ + local i + + for i in {0..2}; do + vlan_destroy $h1 1$i + done + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + local i + + simple_if_init $h2 + mtu_set $h2 9900 + for i in {0..2}; do + vlan_create $h2 1$i v$h2 $(dip $i)/28 + done +} + +h2_destroy() +{ + local i + + for i in {0..2}; do + vlan_destroy $h2 1$i + done + mtu_restore $h2 + simple_if_fini $h2 +} + +ets_switch_create() +{ + local i + + ip link set dev $swp1 up + mtu_set $swp1 9900 + + ip link set dev $swp2 up + mtu_set $swp2 9900 + + for i in {0..2}; do + vlan_create $swp1 1$i + ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 + + vlan_create $swp2 1$i + + ip link add dev br1$i type bridge + ip link set dev $swp1.1$i master br1$i + ip link set dev $swp2.1$i master br1$i + + ip link set dev br1$i up + ip link set dev $swp1.1$i up + ip link set dev $swp2.1$i up + done +} + +ets_switch_destroy() +{ + local i + + ets_delete_qdisc + + for i in {0..2}; do + ip link del dev br1$i + vlan_destroy $swp2 1$i + vlan_destroy $swp1 1$i + done + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + put=$swp2 + hut=$h2 + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(dip 0) " vlan 10" + ping_test $h1.11 $(dip 1) " vlan 11" + ping_test $h1.12 $(dip 2) " vlan 12" +} + +ets_run() +{ + trap cleanup EXIT + + setup_prepare + setup_wait + + tests_run + + exit $EXIT_STATUS +} diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh new file mode 100644 index 000000000000..3c3b204d47e8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -0,0 +1,227 @@ +# SPDX-License-Identifier: GPL-2.0 + +# Global interface: +# $put -- port under test (e.g. $swp2) +# get_stats($band) -- A function to collect stats for band +# ets_start_traffic($band) -- Start traffic for this band +# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc + +# WS describes the Qdisc configuration. It has one value per band (so the +# number of array elements indicates the number of bands). If the value is +# 0, it is a strict band, otherwise the it's a DRR band and the value is +# that band's quantum. +declare -a WS + +qdisc_describe() +{ + local nbands=${#WS[@]} + local nstrict=0 + local i + + for ((i = 0; i < nbands; i++)); do + if ((!${WS[$i]})); then + : $((nstrict++)) + fi + done + + echo -n "ets bands $nbands" + if ((nstrict)); then + echo -n " strict $nstrict" + fi + if ((nstrict < nbands)); then + echo -n " quanta" + for ((i = nstrict; i < nbands; i++)); do + echo -n " ${WS[$i]}" + done + fi +} + +__strict_eval() +{ + local desc=$1; shift + local d=$1; shift + local total=$1; shift + local above=$1; shift + + RET=0 + + if ((! total)); then + check_err 1 "No traffic observed" + log_test "$desc" + return + fi + + local ratio=$(echo "scale=2; 100 * $d / $total" | bc -l) + if ((above)); then + test $(echo "$ratio > 95.0" | bc -l) -eq 1 + check_err $? "Not enough traffic" + log_test "$desc" + log_info "Expected ratio >95% Measured ratio $ratio" + else + test $(echo "$ratio < 5" | bc -l) -eq 1 + check_err $? "Too much traffic" + log_test "$desc" + log_info "Expected ratio <5% Measured ratio $ratio" + fi +} + +strict_eval() +{ + __strict_eval "$@" 1 +} + +notraf_eval() +{ + __strict_eval "$@" 0 +} + +__ets_dwrr_test() +{ + local -a streams=("$@") + + local low_stream=${streams[0]} + local seen_strict=0 + local -a t0 t1 d + local stream + local total + local i + + echo "Testing $(qdisc_describe), streams ${streams[@]}" + + for stream in ${streams[@]}; do + ets_start_traffic $stream + done + + sleep 10 + + t0=($(for stream in ${streams[@]}; do + get_stats $stream + done)) + + sleep 10 + + t1=($(for stream in ${streams[@]}; do + get_stats $stream + done)) + d=($(for ((i = 0; i < ${#streams[@]}; i++)); do + echo $((${t1[$i]} - ${t0[$i]})) + done)) + total=$(echo ${d[@]} | sed 's/ /+/g' | bc) + + for ((i = 0; i < ${#streams[@]}; i++)); do + local stream=${streams[$i]} + if ((seen_strict)); then + notraf_eval "band $stream" ${d[$i]} $total + elif ((${WS[$stream]} == 0)); then + strict_eval "band $stream" ${d[$i]} $total + seen_strict=1 + elif ((stream == low_stream)); then + # Low stream is used as DWRR evaluation reference. + continue + else + multipath_eval "bands $low_stream:$stream" \ + ${WS[$low_stream]} ${WS[$stream]} \ + ${d[0]} ${d[$i]} + fi + done + + for stream in ${streams[@]}; do + stop_traffic + done +} + +ets_dwrr_test_012() +{ + __ets_dwrr_test 0 1 2 +} + +ets_dwrr_test_01() +{ + __ets_dwrr_test 0 1 +} + +ets_dwrr_test_12() +{ + __ets_dwrr_test 1 2 +} + +ets_qdisc_setup() +{ + local dev=$1; shift + local nstrict=$1; shift + local -a quanta=("$@") + + local ndwrr=${#quanta[@]} + local nbands=$((nstrict + ndwrr)) + local nstreams=$(if ((nbands > 3)); then echo 3; else echo $nbands; fi) + local priomap=$(seq 0 $((nstreams - 1))) + local i + + WS=($( + for ((i = 0; i < nstrict; i++)); do + echo 0 + done + for ((i = 0; i < ndwrr; i++)); do + echo ${quanta[$i]} + done + )) + + ets_change_qdisc $dev $nstrict "$priomap" ${quanta[@]} +} + +ets_set_dwrr_uniform() +{ + ets_qdisc_setup $put 0 3300 3300 3300 +} + +ets_set_dwrr_varying() +{ + ets_qdisc_setup $put 0 5000 3500 1500 +} + +ets_set_strict() +{ + ets_qdisc_setup $put 3 +} + +ets_set_mixed() +{ + ets_qdisc_setup $put 1 5000 2500 1500 +} + +ets_change_quantum() +{ + tc class change dev $put classid 10:2 ets quantum 8000 + WS[1]=8000 +} + +ets_set_dwrr_two_bands() +{ + ets_qdisc_setup $put 0 5000 2500 +} + +ets_test_strict() +{ + ets_set_strict + ets_dwrr_test_01 + ets_dwrr_test_12 +} + +ets_test_mixed() +{ + ets_set_mixed + ets_dwrr_test_01 + ets_dwrr_test_12 +} + +ets_test_dwrr() +{ + ets_set_dwrr_uniform + ets_dwrr_test_012 + ets_set_dwrr_varying + ets_dwrr_test_012 + ets_change_quantum + ets_dwrr_test_012 + ets_set_dwrr_two_bands + ets_dwrr_test_01 +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh new file mode 100644 index 000000000000..d1f26cb7cd73 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -0,0 +1,233 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends a stream of traffic from H1 through a switch, to H2. On the +# egress port from the switch ($swp2), a shaper is installed. The test verifies +# that the rates on the port match the configured shaper. +# +# In order to test per-class shaping, $swp2 actually contains TBF under PRIO or +# ETS, with two different configurations. Traffic is prioritized using 802.1p. +# +# +-------------------------------------------+ +# | H1 | +# | + $h1.10 $h1.11 + | +# | | 192.0.2.1/28 192.0.2.17/28 | | +# | | | | +# | \______________ _____________/ | +# | \ / | +# | + $h1 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | SW + $swp1 | +# | _______________/ \_______________ | +# | / \ | +# | +-|--------------+ +--------------|-+ | +# | | + $swp1.10 | | $swp1.11 + | | +# | | | | | | +# | | BR10 | | BR11 | | +# | | | | | | +# | | + $swp2.10 | | $swp2.11 + | | +# | +-|--------------+ +--------------|-+ | +# | \_______________ ______________/ | +# | \ / | +# | + $swp2 | +# +---------------------|---------------------+ +# | +# +---------------------|---------------------+ +# | H2 + $h2 | +# | ______________/ \______________ | +# | / \ | +# | | | | +# | + $h2.10 $h2.11 + | +# | 192.0.2.2/28 192.0.2.18/28 | +# +-------------------------------------------+ + +NUM_NETIFS=4 +CHECK_TC="yes" +source $lib_dir/lib.sh + +ipaddr() +{ + local host=$1; shift + local vlan=$1; shift + + echo 192.0.2.$((16 * (vlan - 10) + host)) +} + +host_create() +{ + local dev=$1; shift + local host=$1; shift + + simple_if_init $dev + mtu_set $dev 10000 + + vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + ip link set dev $dev.10 type vlan egress 0:0 + + vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + ip link set dev $dev.11 type vlan egress 0:1 +} + +host_destroy() +{ + local dev=$1; shift + + vlan_destroy $dev 11 + vlan_destroy $dev 10 + mtu_restore $dev + simple_if_fini $dev +} + +h1_create() +{ + host_create $h1 1 +} + +h1_destroy() +{ + host_destroy $h1 +} + +h2_create() +{ + host_create $h2 2 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ + flower $TCFLAGS vlan_id 10 action pass + tc filter add dev $h2 ingress pref 1011 prot 802.1q \ + flower $TCFLAGS vlan_id 11 action pass +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + host_destroy $h2 +} + +switch_create() +{ + local intf + local vlan + + ip link add dev br10 type bridge + ip link add dev br11 type bridge + + for intf in $swp1 $swp2; do + ip link set dev $intf up + mtu_set $intf 10000 + + for vlan in 10 11; do + vlan_create $intf $vlan + ip link set dev $intf.$vlan master br$vlan + ip link set dev $intf.$vlan up + done + done + + for vlan in 10 11; do + ip link set dev $swp1.$vlan type vlan ingress 0:0 1:1 + done + + ip link set dev br10 up + ip link set dev br11 up +} + +switch_destroy() +{ + local intf + local vlan + + # A test may have been interrupted mid-run, with Qdisc installed. Delete + # it here. + tc qdisc del dev $swp2 root 2>/dev/null + + ip link set dev br11 down + ip link set dev br10 down + + for intf in $swp2 $swp1; do + for vlan in 11 10; do + ip link set dev $intf.$vlan down + ip link set dev $intf.$vlan nomaster + vlan_destroy $intf $vlan + done + + mtu_restore $intf + ip link set dev $intf down + done + + ip link del dev br11 + ip link del dev br10 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + swp4=${NETIFS[p7]} + swp5=${NETIFS[p8]} + + h2_mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1.10 $(ipaddr 2 10) " vlan 10" + ping_test $h1.11 $(ipaddr 2 11) " vlan 11" +} + +tbf_get_counter() +{ + local vlan=$1; shift + + tc_rule_stats_get $h2 10$vlan ingress .bytes +} + +do_tbf_test() +{ + local vlan=$1; shift + local mbit=$1; shift + + start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + sleep 5 # Wait for the burst to dwindle + + local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) + sleep 10 + local t3=$(tbf_get_counter $vlan) + stop_traffic + + RET=0 + + # Note: TBF uses 10^6 Mbits, not 2^20 ones. + local er=$((mbit * 1000 * 1000)) + local nr=$(rate $t2 $t3 10) + local nr_pct=$((100 * (nr - er) / er)) + ((-5 <= nr_pct && nr_pct <= 5)) + check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + + log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh new file mode 100755 index 000000000000..84fb6cab88e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_ets.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="ets strict" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh new file mode 100644 index 000000000000..8bd85da1905a --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -0,0 +1,39 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 parent 10:3 handle 103: tbf \ + rate 400Mbit burst $bs limit 1M + tc qdisc replace dev $swp2 parent 10:2 handle 102: tbf \ + rate 800Mbit burst $bs limit 1M + + do_tbf_test 10 400 $bs + do_tbf_test 11 800 $bs +} + +tbf_test() +{ + # This test is used for both ETS and PRIO. Even though we only need two + # bands, PRIO demands a minimum of three. + tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh new file mode 100755 index 000000000000..9c8cb1cb9ba4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_prio.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +QDISC="prio bands" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_etsprio.sh diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh new file mode 100755 index 000000000000..72aa21ba88c7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + ping_ipv4 + tbf_test +" +: ${lib_dir:=.} +source $lib_dir/sch_tbf_core.sh + +tbf_test_one() +{ + local bs=$1; shift + + tc qdisc replace dev $swp2 root handle 108: tbf \ + rate 400Mbit burst $bs limit 1M + do_tbf_test 10 400 $bs +} + +tbf_test() +{ + tbf_test_one 128K + tc qdisc del dev $swp2 root +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 9d3b64a2a264..64f652633585 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -3,23 +3,48 @@ CHECK_TC="yes" +# Can be overridden by the configuration file. See lib.sh +TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms + +__tc_check_packets() +{ + local id=$1 + local handle=$2 + local count=$3 + local operator=$4 + + start_time="$(date -u +%s%3N)" + while true + do + cmd_jq "tc -j -s filter show $id" \ + ".[] | select(.options.handle == $handle) | \ + select(.options.actions[0].stats.packets $operator $count)" \ + &> /dev/null + ret=$? + if [[ $ret -eq 0 ]]; then + return $ret + fi + current_time="$(date -u +%s%3N)" + diff=$(expr $current_time - $start_time) + if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then + return 1 + fi + done +} + tc_check_packets() { local id=$1 local handle=$2 local count=$3 - local ret - - output="$(tc -j -s filter show $id)" - # workaround the jq bug which causes jq to return 0 in case input is "" - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output | \ - jq -e ".[] \ - | select(.options.handle == $handle) \ - | select(.options.actions[0].stats.packets == $count)" \ - &> /dev/null - return $? + + __tc_check_packets "$id" "$handle" "$count" "==" +} + +tc_check_packets_hitting() +{ + local id=$1 + local handle=$2 + + __tc_check_packets "$id" "$handle" 0 ">" } diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh new file mode 100755 index 000000000000..5782433886fc --- /dev/null +++ b/tools/testing/selftests/net/l2tp.sh @@ -0,0 +1,382 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# L2TPv3 tunnel between 2 hosts +# +# host-1 | router | host-2 +# | | +# lo l2tp | | l2tp lo +# 172.16.101.1 172.16.1.1 | | 172.16.1.2 172.16.101.2 +# fc00:101::1 fc00:1::1 | | fc00:1::2 fc00:101::2 +# | | +# eth0 | | eth0 +# 10.1.1.1 | | 10.1.2.1 +# 2001:db8:1::1 | | 2001:db8:2::1 + +VERBOSE=0 +PAUSE_ON_FAIL=no + +which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) + +################################################################################ +# +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local ns + local cmd + local out + local rc + + ns="$1" + shift + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out=$(eval ip netns exec ${ns} ${cmd} 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# create namespaces and interconnects + +create_ns() +{ + local ns=$1 + local addr=$2 + local addr6=$3 + + [ -z "${addr}" ] && addr="-" + [ -z "${addr6}" ] && addr6="-" + + ip netns add ${ns} + + ip -netns ${ns} link set lo up + if [ "${addr}" != "-" ]; then + ip -netns ${ns} addr add dev lo ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip -netns ${ns} -6 addr add dev lo ${addr6} + fi + + ip -netns ${ns} ro add unreachable default metric 8192 + ip -netns ${ns} -6 ro add unreachable default metric 8192 + + ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +# create veth pair to connect namespaces and apply addresses. +connect_ns() +{ + local ns1=$1 + local ns1_dev=$2 + local ns1_addr=$3 + local ns1_addr6=$4 + local ns2=$5 + local ns2_dev=$6 + local ns2_addr=$7 + local ns2_addr6=$8 + + ip -netns ${ns1} li add ${ns1_dev} type veth peer name tmp + ip -netns ${ns1} li set ${ns1_dev} up + ip -netns ${ns1} li set tmp netns ${ns2} name ${ns2_dev} + ip -netns ${ns2} li set ${ns2_dev} up + + if [ "${ns1_addr}" != "-" ]; then + ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr} + ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr} + fi + + if [ "${ns1_addr6}" != "-" ]; then + ip -netns ${ns1} addr add dev ${ns1_dev} ${ns1_addr6} + ip -netns ${ns2} addr add dev ${ns2_dev} ${ns2_addr6} + fi +} + +################################################################################ +# test setup + +cleanup() +{ + local ns + + for ns in host-1 host-2 router + do + ip netns del ${ns} 2>/dev/null + done +} + +setup_l2tp_ipv4() +{ + # + # configure l2tpv3 tunnel on host-1 + # + ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ + encap ip local 10.1.1.1 remote 10.1.2.1 + ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \ + session_id 1041 peer_session_id 1042 + ip -netns host-1 link set dev l2tp4 up + ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 + + # + # configure l2tpv3 tunnel on host-2 + # + ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ + encap ip local 10.1.2.1 remote 10.1.1.1 + ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \ + session_id 1042 peer_session_id 1041 + ip -netns host-2 link set dev l2tp4 up + ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 + + # + # add routes to loopback addresses + # + ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2 + ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1 +} + +setup_l2tp_ipv6() +{ + # + # configure l2tpv3 tunnel on host-1 + # + ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ + encap ip local 2001:db8:1::1 remote 2001:db8:2::1 + ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \ + session_id 1061 peer_session_id 1062 + ip -netns host-1 link set dev l2tp6 up + ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 + + # + # configure l2tpv3 tunnel on host-2 + # + ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ + encap ip local 2001:db8:2::1 remote 2001:db8:1::1 + ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \ + session_id 1062 peer_session_id 1061 + ip -netns host-2 link set dev l2tp6 up + ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 + + # + # add routes to loopback addresses + # + ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2 + ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1 +} + +setup() +{ + # start clean + cleanup + + set -e + create_ns host-1 172.16.101.1/32 fc00:101::1/128 + create_ns host-2 172.16.101.2/32 fc00:101::2/128 + create_ns router + + connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ + router eth1 10.1.1.2/24 2001:db8:1::2/64 + + connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ + router eth2 10.1.2.2/24 2001:db8:2::2/64 + + ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2 + ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 + + ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2 + ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 + + setup_l2tp_ipv4 + setup_l2tp_ipv6 + set +e +} + +setup_ipsec() +{ + # + # IPv4 + # + run_cmd host-1 ip xfrm policy add \ + src 10.1.1.1 dst 10.1.2.1 dir out \ + tmpl proto esp mode transport + + run_cmd host-1 ip xfrm policy add \ + src 10.1.2.1 dst 10.1.1.1 dir in \ + tmpl proto esp mode transport + + run_cmd host-2 ip xfrm policy add \ + src 10.1.1.1 dst 10.1.2.1 dir in \ + tmpl proto esp mode transport + + run_cmd host-2 ip xfrm policy add \ + src 10.1.2.1 dst 10.1.1.1 dir out \ + tmpl proto esp mode transport + + ip -netns host-1 xfrm state add \ + src 10.1.1.1 dst 10.1.2.1 \ + spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-1 xfrm state add \ + src 10.1.2.1 dst 10.1.1.1 \ + spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-2 xfrm state add \ + src 10.1.1.1 dst 10.1.2.1 \ + spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-2 xfrm state add \ + src 10.1.2.1 dst 10.1.1.1 \ + spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + # + # IPV6 + # + run_cmd host-1 ip -6 xfrm policy add \ + src 2001:db8:1::1 dst 2001:db8:2::1 dir out \ + tmpl proto esp mode transport + + run_cmd host-1 ip -6 xfrm policy add \ + src 2001:db8:2::1 dst 2001:db8:1::1 dir in \ + tmpl proto esp mode transport + + run_cmd host-2 ip -6 xfrm policy add \ + src 2001:db8:1::1 dst 2001:db8:2::1 dir in \ + tmpl proto esp mode transport + + run_cmd host-2 ip -6 xfrm policy add \ + src 2001:db8:2::1 dst 2001:db8:1::1 dir out \ + tmpl proto esp mode transport + + ip -netns host-1 -6 xfrm state add \ + src 2001:db8:1::1 dst 2001:db8:2::1 \ + spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-1 -6 xfrm state add \ + src 2001:db8:2::1 dst 2001:db8:1::1 \ + spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-2 -6 xfrm state add \ + src 2001:db8:1::1 dst 2001:db8:2::1 \ + spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport + + ip -netns host-2 -6 xfrm state add \ + src 2001:db8:2::1 dst 2001:db8:1::1 \ + spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ + 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport +} + +teardown_ipsec() +{ + run_cmd host-1 ip xfrm state flush + run_cmd host-1 ip xfrm policy flush + run_cmd host-2 ip xfrm state flush + run_cmd host-2 ip xfrm policy flush +} + +################################################################################ +# generate traffic through tunnel for various cases + +run_ping() +{ + local desc="$1" + + run_cmd host-1 ping -c1 -w1 172.16.1.2 + log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" + + run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" + + run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + log_test $? 0 "IPv6 basic L2TP tunnel ${desc}" + + run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + log_test $? 0 "IPv6 route through L2TP tunnel ${desc}" +} + +run_tests() +{ + local desc + + setup + run_ping + + setup_ipsec + run_ping "- with IPsec" + run_cmd host-1 ping -c1 -w1 172.16.1.2 + log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" + + run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" + + run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec" + + run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec" + + teardown_ipsec + run_ping "- after IPsec teardown" +} + +################################################################################ +# main + +declare -i nfail=0 +declare -i nsuccess=0 + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + *) exit 1;; + esac +done + +run_tests +cleanup + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore new file mode 100644 index 000000000000..d72f07642738 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -0,0 +1,2 @@ +mptcp_connect +*.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile new file mode 100644 index 000000000000..93de52016dde --- /dev/null +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g + +TEST_PROGS := mptcp_connect.sh + +TEST_GEN_FILES = mptcp_connect + +EXTRA_CLEAN := *.pcap + +include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config new file mode 100644 index 000000000000..2499824d9e1c --- /dev/null +++ b/tools/testing/selftests/net/mptcp/config @@ -0,0 +1,4 @@ +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y +CONFIG_VETH=y +CONFIG_NET_SCH_NETEM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c new file mode 100644 index 000000000000..99579c0223c1 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <limits.h> +#include <fcntl.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <unistd.h> + +#include <sys/poll.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/mman.h> + +#include <netdb.h> +#include <netinet/in.h> + +#include <linux/tcp.h> + +extern int optind; + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif +#ifndef TCP_ULP +#define TCP_ULP 31 +#endif + +static bool listen_mode; +static int poll_timeout; + +enum cfg_mode { + CFG_MODE_POLL, + CFG_MODE_MMAP, + CFG_MODE_SENDFILE, +}; + +static enum cfg_mode cfg_mode = CFG_MODE_POLL; +static const char *cfg_host; +static const char *cfg_port = "12000"; +static int cfg_sock_proto = IPPROTO_MPTCP; +static bool tcpulp_audit; +static int pf = AF_INET; +static int cfg_sndbuf; + +static void die_usage(void) +{ + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]" + "[ -l ] [ -t timeout ] connect_address\n"); + exit(1); +} + +static const char *getxinfo_strerr(int err) +{ + if (err == EAI_SYSTEM) + return strerror(errno); + + return gai_strerror(err); +} + +static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, + char *host, socklen_t hostlen, + char *serv, socklen_t servlen) +{ + int flags = NI_NUMERICHOST | NI_NUMERICSERV; + int err = getnameinfo(addr, addrlen, host, hostlen, serv, servlen, + flags); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getnameinfo: %s\n", errstr); + exit(1); + } +} + +static void xgetaddrinfo(const char *node, const char *service, + const struct addrinfo *hints, + struct addrinfo **res) +{ + int err = getaddrinfo(node, service, hints, res); + + if (err) { + const char *errstr = getxinfo_strerr(err); + + fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", + node ? node : "", service ? service : "", errstr); + exit(1); + } +} + +static void set_sndbuf(int fd, unsigned int size) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, sizeof(size)); + if (err) { + perror("set SO_SNDBUF"); + exit(1); + } +} + +static int sock_listen_mptcp(const char * const listenaddr, + const char * const port) +{ + int sock; + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + .ai_flags = AI_PASSIVE | AI_NUMERICHOST + }; + + hints.ai_family = pf; + + struct addrinfo *a, *addr; + int one = 1; + + xgetaddrinfo(listenaddr, port, &hints, &addr); + hints.ai_family = pf; + + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, cfg_sock_proto); + if (sock < 0) + continue; + + if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, + sizeof(one))) + perror("setsockopt"); + + if (bind(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("bind"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + + if (sock < 0) { + fprintf(stderr, "Could not create listen socket\n"); + return sock; + } + + if (listen(sock, 20)) { + perror("listen"); + close(sock); + return -1; + } + + return sock; +} + +static bool sock_test_tcpulp(const char * const remoteaddr, + const char * const port) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1, ret = 0; + bool test_pass = false; + + hints.ai_family = AF_INET; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP); + if (sock < 0) { + perror("socket"); + continue; + } + ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp", + sizeof("mptcp")); + if (ret == -1 && errno == EOPNOTSUPP) + test_pass = true; + close(sock); + + if (test_pass) + break; + if (!ret) + fprintf(stderr, + "setsockopt(TCP_ULP) returned 0\n"); + else + perror("setsockopt(TCP_ULP)"); + } + return test_pass; +} + +static int sock_connect_mptcp(const char * const remoteaddr, + const char * const port, int proto) +{ + struct addrinfo hints = { + .ai_protocol = IPPROTO_TCP, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *a, *addr; + int sock = -1; + + hints.ai_family = pf; + + xgetaddrinfo(remoteaddr, port, &hints, &addr); + for (a = addr; a; a = a->ai_next) { + sock = socket(a->ai_family, a->ai_socktype, proto); + if (sock < 0) { + perror("socket"); + continue; + } + + if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) + break; /* success */ + + perror("connect()"); + close(sock); + sock = -1; + } + + freeaddrinfo(addr); + return sock; +} + +static size_t do_rnd_write(const int fd, char *buf, const size_t len) +{ + unsigned int do_w; + ssize_t bw; + + do_w = rand() & 0xffff; + if (do_w == 0 || do_w > len) + do_w = len; + + bw = write(fd, buf, do_w); + if (bw < 0) + perror("write"); + + return bw; +} + +static size_t do_write(const int fd, char *buf, const size_t len) +{ + size_t offset = 0; + + while (offset < len) { + size_t written; + ssize_t bw; + + bw = write(fd, buf + offset, len - offset); + if (bw < 0) { + perror("write"); + return 0; + } + + written = (size_t)bw; + offset += written; + } + + return offset; +} + +static ssize_t do_rnd_read(const int fd, char *buf, const size_t len) +{ + size_t cap = rand(); + + cap &= 0xffff; + + if (cap == 0) + cap = 1; + else if (cap > len) + cap = len; + + return read(fd, buf, cap); +} + +static void set_nonblock(int fd) +{ + int flags = fcntl(fd, F_GETFL); + + if (flags == -1) + return; + + fcntl(fd, F_SETFL, flags | O_NONBLOCK); +} + +static int copyfd_io_poll(int infd, int peerfd, int outfd) +{ + struct pollfd fds = { + .fd = peerfd, + .events = POLLIN | POLLOUT, + }; + unsigned int woff = 0, wlen = 0; + char wbuf[8192]; + + set_nonblock(peerfd); + + for (;;) { + char rbuf[8192]; + ssize_t len; + + if (fds.events == 0) + break; + + switch (poll(&fds, 1, poll_timeout)) { + case -1: + if (errno == EINTR) + continue; + perror("poll"); + return 1; + case 0: + fprintf(stderr, "%s: poll timed out (events: " + "POLLIN %u, POLLOUT %u)\n", __func__, + fds.events & POLLIN, fds.events & POLLOUT); + return 2; + } + + if (fds.revents & POLLIN) { + len = do_rnd_read(peerfd, rbuf, sizeof(rbuf)); + if (len == 0) { + /* no more data to receive: + * peer has closed its write side + */ + fds.events &= ~POLLIN; + + if ((fds.events & POLLOUT) == 0) + /* and nothing more to send */ + break; + + /* Else, still have data to transmit */ + } else if (len < 0) { + perror("read"); + return 3; + } + + do_write(outfd, rbuf, len); + } + + if (fds.revents & POLLOUT) { + if (wlen == 0) { + woff = 0; + wlen = read(infd, wbuf, sizeof(wbuf)); + } + + if (wlen > 0) { + ssize_t bw; + + bw = do_rnd_write(peerfd, wbuf + woff, wlen); + if (bw < 0) + return 111; + + woff += bw; + wlen -= bw; + } else if (wlen == 0) { + /* We have no more data to send. */ + fds.events &= ~POLLOUT; + + if ((fds.events & POLLIN) == 0) + /* ... and peer also closed already */ + break; + + /* ... but we still receive. + * Close our write side. + */ + shutdown(peerfd, SHUT_WR); + } else { + if (errno == EINTR) + continue; + perror("read"); + return 4; + } + } + + if (fds.revents & (POLLERR | POLLNVAL)) { + fprintf(stderr, "Unexpected revents: " + "POLLERR/POLLNVAL(%x)\n", fds.revents); + return 5; + } + } + + close(peerfd); + return 0; +} + +static int do_recvfile(int infd, int outfd) +{ + ssize_t r; + + do { + char buf[16384]; + + r = do_rnd_read(infd, buf, sizeof(buf)); + if (r > 0) { + if (write(outfd, buf, r) != r) + break; + } else if (r < 0) { + perror("read"); + } + } while (r > 0); + + return (int)r; +} + +static int do_mmap(int infd, int outfd, unsigned int size) +{ + char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0); + ssize_t ret = 0, off = 0; + size_t rem; + + if (inbuf == MAP_FAILED) { + perror("mmap"); + return 1; + } + + rem = size; + + while (rem > 0) { + ret = write(outfd, inbuf + off, rem); + + if (ret < 0) { + perror("write"); + break; + } + + off += ret; + rem -= ret; + } + + munmap(inbuf, size); + return rem; +} + +static int get_infd_size(int fd) +{ + struct stat sb; + ssize_t count; + int err; + + err = fstat(fd, &sb); + if (err < 0) { + perror("fstat"); + return -1; + } + + if ((sb.st_mode & S_IFMT) != S_IFREG) { + fprintf(stderr, "%s: stdin is not a regular file\n", __func__); + return -2; + } + + count = sb.st_size; + if (count > INT_MAX) { + fprintf(stderr, "File too large: %zu\n", count); + return -3; + } + + return (int)count; +} + +static int do_sendfile(int infd, int outfd, unsigned int count) +{ + while (count > 0) { + ssize_t r; + + r = sendfile(outfd, infd, NULL, count); + if (r < 0) { + perror("sendfile"); + return 3; + } + + count -= r; + } + + return 0; +} + +static int copyfd_io_mmap(int infd, int peerfd, int outfd, + unsigned int size) +{ + int err; + + if (listen_mode) { + err = do_recvfile(peerfd, outfd); + if (err) + return err; + + err = do_mmap(infd, peerfd, size); + } else { + err = do_mmap(infd, peerfd, size); + if (err) + return err; + + shutdown(peerfd, SHUT_WR); + + err = do_recvfile(peerfd, outfd); + } + + return err; +} + +static int copyfd_io_sendfile(int infd, int peerfd, int outfd, + unsigned int size) +{ + int err; + + if (listen_mode) { + err = do_recvfile(peerfd, outfd); + if (err) + return err; + + err = do_sendfile(infd, peerfd, size); + } else { + err = do_sendfile(infd, peerfd, size); + if (err) + return err; + err = do_recvfile(peerfd, outfd); + } + + return err; +} + +static int copyfd_io(int infd, int peerfd, int outfd) +{ + int file_size; + + switch (cfg_mode) { + case CFG_MODE_POLL: + return copyfd_io_poll(infd, peerfd, outfd); + case CFG_MODE_MMAP: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + return copyfd_io_mmap(infd, peerfd, outfd, file_size); + case CFG_MODE_SENDFILE: + file_size = get_infd_size(infd); + if (file_size < 0) + return file_size; + return copyfd_io_sendfile(infd, peerfd, outfd, file_size); + } + + fprintf(stderr, "Invalid mode %d\n", cfg_mode); + + die_usage(); + return 1; +} + +static void check_sockaddr(int pf, struct sockaddr_storage *ss, + socklen_t salen) +{ + struct sockaddr_in6 *sin6; + struct sockaddr_in *sin; + socklen_t wanted_size = 0; + + switch (pf) { + case AF_INET: + wanted_size = sizeof(*sin); + sin = (void *)ss; + if (!sin->sin_port) + fprintf(stderr, "accept: something wrong: ip connection from port 0"); + break; + case AF_INET6: + wanted_size = sizeof(*sin6); + sin6 = (void *)ss; + if (!sin6->sin6_port) + fprintf(stderr, "accept: something wrong: ipv6 connection from port 0"); + break; + default: + fprintf(stderr, "accept: Unknown pf %d, salen %u\n", pf, salen); + return; + } + + if (salen != wanted_size) + fprintf(stderr, "accept: size mismatch, got %d expected %d\n", + (int)salen, wanted_size); + + if (ss->ss_family != pf) + fprintf(stderr, "accept: pf mismatch, expect %d, ss_family is %d\n", + (int)ss->ss_family, pf); +} + +static void check_getpeername(int fd, struct sockaddr_storage *ss, socklen_t salen) +{ + struct sockaddr_storage peerss; + socklen_t peersalen = sizeof(peerss); + + if (getpeername(fd, (struct sockaddr *)&peerss, &peersalen) < 0) { + perror("getpeername"); + return; + } + + if (peersalen != salen) { + fprintf(stderr, "%s: %d vs %d\n", __func__, peersalen, salen); + return; + } + + if (memcmp(ss, &peerss, peersalen)) { + char a[INET6_ADDRSTRLEN]; + char b[INET6_ADDRSTRLEN]; + char c[INET6_ADDRSTRLEN]; + char d[INET6_ADDRSTRLEN]; + + xgetnameinfo((struct sockaddr *)ss, salen, + a, sizeof(a), b, sizeof(b)); + + xgetnameinfo((struct sockaddr *)&peerss, peersalen, + c, sizeof(c), d, sizeof(d)); + + fprintf(stderr, "%s: memcmp failure: accept %s vs peername %s, %s vs %s salen %d vs %d\n", + __func__, a, c, b, d, peersalen, salen); + } +} + +static void check_getpeername_connect(int fd) +{ + struct sockaddr_storage ss; + socklen_t salen = sizeof(ss); + char a[INET6_ADDRSTRLEN]; + char b[INET6_ADDRSTRLEN]; + + if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { + perror("getpeername"); + return; + } + + xgetnameinfo((struct sockaddr *)&ss, salen, + a, sizeof(a), b, sizeof(b)); + + if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, + cfg_host, a, cfg_port, b); +} + +static void maybe_close(int fd) +{ + unsigned int r = rand(); + + if (r & 1) + close(fd); +} + +int main_loop_s(int listensock) +{ + struct sockaddr_storage ss; + struct pollfd polls; + socklen_t salen; + int remotesock; + + polls.fd = listensock; + polls.events = POLLIN; + + switch (poll(&polls, 1, poll_timeout)) { + case -1: + perror("poll"); + return 1; + case 0: + fprintf(stderr, "%s: timed out\n", __func__); + close(listensock); + return 2; + } + + salen = sizeof(ss); + remotesock = accept(listensock, (struct sockaddr *)&ss, &salen); + if (remotesock >= 0) { + maybe_close(listensock); + check_sockaddr(pf, &ss, salen); + check_getpeername(remotesock, &ss, salen); + + return copyfd_io(0, remotesock, 1); + } + + perror("accept"); + + return 1; +} + +static void init_rng(void) +{ + int fd = open("/dev/urandom", O_RDONLY); + unsigned int foo; + + if (fd > 0) { + int ret = read(fd, &foo, sizeof(foo)); + + if (ret < 0) + srand(fd + foo); + close(fd); + } + + srand(foo); +} + +int main_loop(void) +{ + int fd; + + /* listener is ready. */ + fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto); + if (fd < 0) + return 2; + + check_getpeername_connect(fd); + + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + + return copyfd_io(0, fd, 1); +} + +int parse_proto(const char *proto) +{ + if (!strcasecmp(proto, "MPTCP")) + return IPPROTO_MPTCP; + if (!strcasecmp(proto, "TCP")) + return IPPROTO_TCP; + + fprintf(stderr, "Unknown protocol: %s\n.", proto); + die_usage(); + + /* silence compiler warning */ + return 0; +} + +int parse_mode(const char *mode) +{ + if (!strcasecmp(mode, "poll")) + return CFG_MODE_POLL; + if (!strcasecmp(mode, "mmap")) + return CFG_MODE_MMAP; + if (!strcasecmp(mode, "sendfile")) + return CFG_MODE_SENDFILE; + + fprintf(stderr, "Unknown test mode: %s\n", mode); + fprintf(stderr, "Supported modes are:\n"); + fprintf(stderr, "\t\t\"poll\" - interleaved read/write using poll()\n"); + fprintf(stderr, "\t\t\"mmap\" - send entire input file (mmap+write), then read response (-l will read input first)\n"); + fprintf(stderr, "\t\t\"sendfile\" - send entire input file (sendfile), then read response (-l will read input first)\n"); + + die_usage(); + + /* silence compiler warning */ + return 0; +} + +int parse_sndbuf(const char *size) +{ + unsigned long s; + + errno = 0; + + s = strtoul(size, NULL, 0); + + if (errno) { + fprintf(stderr, "Invalid sndbuf size %s (%s)\n", + size, strerror(errno)); + die_usage(); + } + + if (s > INT_MAX) { + fprintf(stderr, "Invalid sndbuf size %s (%s)\n", + size, strerror(ERANGE)); + die_usage(); + } + + cfg_sndbuf = s; + + return 0; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) { + switch (c) { + case 'l': + listen_mode = true; + break; + case 'p': + cfg_port = optarg; + break; + case 's': + cfg_sock_proto = parse_proto(optarg); + break; + case 'h': + die_usage(); + break; + case 'u': + tcpulp_audit = true; + break; + case '6': + pf = AF_INET6; + break; + case 't': + poll_timeout = atoi(optarg) * 1000; + if (poll_timeout <= 0) + poll_timeout = -1; + break; + case 'm': + cfg_mode = parse_mode(optarg); + break; + case 'b': + cfg_sndbuf = parse_sndbuf(optarg); + break; + } + } + + if (optind + 1 != argc) + die_usage(); + cfg_host = argv[optind]; + + if (strchr(cfg_host, ':')) + pf = AF_INET6; +} + +int main(int argc, char *argv[]) +{ + init_rng(); + + parse_opts(argc, argv); + + if (tcpulp_audit) + return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1; + + if (listen_mode) { + int fd = sock_listen_mptcp(cfg_host, cfg_port); + + if (fd < 0) + return 1; + + if (cfg_sndbuf) + set_sndbuf(fd, cfg_sndbuf); + + return main_loop_s(fd); + } + + return main_loop(); +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh new file mode 100755 index 000000000000..d573a0feb98d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -0,0 +1,595 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +time_start=$(date +%s) + +optstring="b:d:e:l:r:h4cm:" +ret=0 +sin="" +sout="" +cin="" +cout="" +ksft_skip=4 +capture=false +timeout=30 +ipv6=true +ethtool_random_on=true +tc_delay="$((RANDOM%400))" +tc_loss=$((RANDOM%101)) +tc_reorder="" +testmode="" +sndbuf=0 +options_log=true + +if [ $tc_loss -eq 100 ];then + tc_loss=1% +elif [ $tc_loss -ge 10 ]; then + tc_loss=0.$tc_loss% +elif [ $tc_loss -ge 1 ]; then + tc_loss=0.0$tc_loss% +else + tc_loss="" +fi + +usage() { + echo "Usage: $0 [ -a ]" + echo -e "\t-d: tc/netem delay in milliseconds, e.g. \"-d 10\" (default random)" + echo -e "\t-l: tc/netem loss percentage, e.g. \"-l 0.02\" (default random)" + echo -e "\t-r: tc/netem reorder mode, e.g. \"-r 25% 50% gap 5\", use "-r 0" to disable reordering (default random)" + echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" + echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-b: set sndbuf value (default: use kernel default)" + echo -e "\t-m: test mode (poll, sendfile; default: poll)" +} + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "d") + if [ $OPTARG -ge 0 ];then + tc_delay="$OPTARG" + else + echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "e") + ethtool_args="$ethtool_args $OPTARG off" + ethtool_random_on=false + ;; + "l") + tc_loss="$OPTARG" + ;; + "r") + tc_reorder="$OPTARG" + ;; + "4") + ipv6=false + ;; + "c") + capture=true + ;; + "b") + if [ $OPTARG -ge 0 ];then + sndbuf="$OPTARG" + else + echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "m") + testmode="$OPTARG" + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" +ns4="ns4-$rndh" + +TEST_COUNT=0 + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + rm -f "$capout" + + local netns + for netns in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +capout=$(mktemp) +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +# "$ns1" ns2 ns3 ns4 +# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 +# - drop 1% -> reorder 25% +# <- TSO off - + +ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" +ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth2 netns "$ns3" +ip link add ns3eth4 netns "$ns3" type veth peer name ns4eth3 netns "$ns4" + +ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2 +ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad + +ip -net "$ns1" link set ns1eth2 up +ip -net "$ns1" route add default via 10.0.1.2 +ip -net "$ns1" route add default via dead:beef:1::2 + +ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 +ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad +ip -net "$ns2" link set ns2eth1 up + +ip -net "$ns2" addr add 10.0.2.1/24 dev ns2eth3 +ip -net "$ns2" addr add dead:beef:2::1/64 dev ns2eth3 nodad +ip -net "$ns2" link set ns2eth3 up +ip -net "$ns2" route add default via 10.0.2.2 +ip -net "$ns2" route add default via dead:beef:2::2 +ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + +ip -net "$ns3" addr add 10.0.2.2/24 dev ns3eth2 +ip -net "$ns3" addr add dead:beef:2::2/64 dev ns3eth2 nodad +ip -net "$ns3" link set ns3eth2 up + +ip -net "$ns3" addr add 10.0.3.2/24 dev ns3eth4 +ip -net "$ns3" addr add dead:beef:3::2/64 dev ns3eth4 nodad +ip -net "$ns3" link set ns3eth4 up +ip -net "$ns3" route add default via 10.0.2.1 +ip -net "$ns3" route add default via dead:beef:2::1 +ip netns exec "$ns3" sysctl -q net.ipv4.ip_forward=1 +ip netns exec "$ns3" sysctl -q net.ipv6.conf.all.forwarding=1 + +ip -net "$ns4" addr add 10.0.3.1/24 dev ns4eth3 +ip -net "$ns4" addr add dead:beef:3::1/64 dev ns4eth3 nodad +ip -net "$ns4" link set ns4eth3 up +ip -net "$ns4" route add default via 10.0.3.2 +ip -net "$ns4" route add default via dead:beef:3::2 + +set_ethtool_flags() { + local ns="$1" + local dev="$2" + local flags="$3" + + ip netns exec $ns ethtool -K $dev $flags 2>/dev/null + [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" +} + +set_random_ethtool_flags() { + local flags="" + local r=$RANDOM + + local pick1=$((r & 1)) + local pick2=$((r & 2)) + local pick3=$((r & 4)) + + [ $pick1 -ne 0 ] && flags="tso off" + [ $pick2 -ne 0 ] && flags="$flags gso off" + [ $pick3 -ne 0 ] && flags="$flags gro off" + + [ -z "$flags" ] && return + + set_ethtool_flags "$1" "$2" "$flags" +} + +if $ethtool_random_on;then + set_random_ethtool_flags "$ns3" ns3eth2 + set_random_ethtool_flags "$ns4" ns4eth3 +else + set_ethtool_flags "$ns3" ns3eth2 "$ethtool_args" + set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" +fi + +print_file_err() +{ + ls -l "$1" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "$1" +} + +check_transfer() +{ + local in=$1 + local out=$2 + local what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "[ FAIL ] $what does not match (in, out):" + print_file_err "$in" + print_file_err "$out" + + return 1 + fi + + return 0 +} + +check_mptcp_disabled() +{ + local disabled_ns + disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)" + ip netns add ${disabled_ns} || exit $ksft_skip + + # net.mptcp.enabled should be enabled by default + if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then + echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" + ret=1 + return 1 + fi + ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 + + local err=0 + LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ + grep -q "^socket: Protocol not available$" && err=1 + ip netns delete ${disabled_ns} + + if [ ${err} -eq 0 ]; then + echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" + ret=1 + return 1 + fi + + echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + return 0 +} + +check_mptcp_ulp_setsockopt() +{ + local t retval + t="ns_ulp-$sech-$(mktemp -u XXXXXX)" + + ip netns add ${t} || exit $ksft_skip + if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then + printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n" + retval=1 + ret=$retval + else + printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n" + retval=0 + fi + ip netns del ${t} + return $retval +} + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local ping_args="-q -c 1" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local local_addr="$6" + local extra_args="" + + local port + port=$((10000+$TEST_COUNT)) + TEST_COUNT=$((TEST_COUNT+1)) + + if [ "$sndbuf" -gt 0 ]; then + extra_args="$extra_args -b $sndbuf" + fi + + if [ -n "$testmode" ]; then + extra_args="$extra_args -m $testmode" + fi + + if [ -n "$extra_args" ] && $options_log; then + options_log=false + echo "INFO: extra options: $extra_args" + fi + + :> "$cout" + :> "$sout" + :> "$capout" + + local addr_port + addr_port=$(printf "%s:%d" ${connect_addr} ${port}) + printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto} + + if $capture; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap" + + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + local cappid=$! + + sleep 1 + fi + + ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${listener_ns}" "${port}" + + local start + start=$(date +%s%3N) + ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + local stop + stop=$(date +%s%3N) + + if $capture; then + sleep 1 + kill $cappid + fi + + local duration + duration=$((stop-start)) + duration=$(printf "(duration %05sms)" $duration) + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 + echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + + cat "$capout" + return 1 + fi + + check_transfer $sin $cout "file received by client" + retc=$? + check_transfer $cin $sout "file received by server" + rets=$? + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + echo "$duration [ OK ]" + cat "$capout" + return 0 + fi + + cat "$capout" + return 1 +} + +make_file() +{ + local name=$1 + local who=$2 + + local SIZE TSIZE + SIZE=$((RANDOM % (1024 * 8))) + TSIZE=$((SIZE * 1024)) + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + + SIZE=$((RANDOM % 1024)) + SIZE=$((SIZE + 128)) + TSIZE=$((TSIZE + SIZE)) + dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + + echo "Created $name (size $TSIZE) containing data sent by $who" +} + +run_tests_lo() +{ + local listener_ns="$1" + local connector_ns="$2" + local connect_addr="$3" + local loopback="$4" + local lret=0 + + # skip if test programs are running inside same netns for subsequent runs. + if [ $loopback -eq 0 ] && [ ${listener_ns} = ${connector_ns} ]; then + return 0 + fi + + # skip if we don't want v6 + if ! $ipv6 && is_v6 "${connect_addr}"; then + return 0 + fi + + local local_addr + if is_v6 "${connect_addr}"; then + local_addr="::" + else + local_addr="0.0.0.0" + fi + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + # don't bother testing fallback tcp except for loopback case. + if [ ${listener_ns} != ${connector_ns} ]; then + return 0 + fi + + do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + + return 0 +} + +run_tests() +{ + run_tests_lo $1 $2 $3 0 +} + +make_file "$cin" "client" +make_file "$sin" "server" + +check_mptcp_disabled + +check_mptcp_ulp_setsockopt + +echo "INFO: validating network environment with pings" +for sender in "$ns1" "$ns2" "$ns3" "$ns4";do + do_ping "$ns1" $sender 10.0.1.1 + do_ping "$ns1" $sender dead:beef:1::1 + + do_ping "$ns2" $sender 10.0.1.2 + do_ping "$ns2" $sender dead:beef:1::2 + do_ping "$ns2" $sender 10.0.2.1 + do_ping "$ns2" $sender dead:beef:2::1 + + do_ping "$ns3" $sender 10.0.2.2 + do_ping "$ns3" $sender dead:beef:2::2 + do_ping "$ns3" $sender 10.0.3.2 + do_ping "$ns3" $sender dead:beef:3::2 + + do_ping "$ns4" $sender 10.0.3.1 + do_ping "$ns4" $sender dead:beef:3::1 +done + +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss +echo -n "INFO: Using loss of $tc_loss " +test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " + +if [ -z "${tc_reorder}" ]; then + reorder1=$((RANDOM%10)) + reorder1=$((100 - reorder1)) + reorder2=$((RANDOM%100)) + + if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + tc_reorder="reorder ${reorder1}% ${reorder2}%" + echo -n "$tc_reorder " + fi +elif [ "$tc_reorder" = "0" ];then + tc_reorder="" +elif [ "$tc_delay" -gt 0 ];then + # reordering requires some delay + tc_reorder="reorder $tc_reorder" + echo -n "$tc_reorder " +fi + +echo "on ns3eth4" + +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder + +for sender in $ns1 $ns2 $ns3 $ns4;do + run_tests_lo "$ns1" "$sender" 10.0.1.1 1 + if [ $ret -ne 0 ] ;then + echo "FAIL: Could not even run loopback test" 1>&2 + exit $ret + fi + run_tests_lo "$ns1" $sender dead:beef:1::1 1 + if [ $ret -ne 0 ] ;then + echo "FAIL: Could not even run loopback v6 test" 2>&1 + exit $ret + fi + + run_tests "$ns2" $sender 10.0.1.2 + run_tests "$ns2" $sender dead:beef:1::2 + run_tests "$ns2" $sender 10.0.2.1 + run_tests "$ns2" $sender dead:beef:2::1 + + run_tests "$ns3" $sender 10.0.2.2 + run_tests "$ns3" $sender dead:beef:2::2 + run_tests "$ns3" $sender 10.0.3.2 + run_tests "$ns3" $sender dead:beef:3::2 + + run_tests "$ns4" $sender 10.0.3.1 + run_tests "$ns4" $sender dead:beef:3::1 +done + +time_end=$(date +%s) +time_run=$((time_end-time_start)) + +echo "Time: ${time_run} seconds" + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings new file mode 100644 index 000000000000..026384c189c9 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/settings @@ -0,0 +1 @@ +timeout=450 diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c new file mode 100644 index 000000000000..93208caacbe6 --- /dev/null +++ b/tools/testing/selftests/net/nettest.c @@ -0,0 +1,1813 @@ +// SPDX-License-Identifier: GPL-2.0 +/* nettest - used for functional tests of networking APIs + * + * Copyright (c) 2013-2019 David Ahern <dsahern@gmail.com>. All rights reserved. + */ + +#define _GNU_SOURCE +#include <features.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <linux/tcp.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netdb.h> +#include <fcntl.h> +#include <libgen.h> +#include <limits.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <time.h> +#include <errno.h> + +#ifndef IPV6_UNICAST_IF +#define IPV6_UNICAST_IF 76 +#endif +#ifndef IPV6_MULTICAST_IF +#define IPV6_MULTICAST_IF 17 +#endif + +#define DEFAULT_PORT 12345 + +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif + +struct sock_args { + /* local address */ + union { + struct in_addr in; + struct in6_addr in6; + } local_addr; + + /* remote address */ + union { + struct in_addr in; + struct in6_addr in6; + } remote_addr; + int scope_id; /* remote scope; v6 send only */ + + struct in_addr grp; /* multicast group */ + + unsigned int has_local_ip:1, + has_remote_ip:1, + has_grp:1, + has_expected_laddr:1, + has_expected_raddr:1, + bind_test_only:1; + + unsigned short port; + + int type; /* DGRAM, STREAM, RAW */ + int protocol; + int version; /* AF_INET/AF_INET6 */ + + int use_setsockopt; + int use_cmsg; + const char *dev; + int ifindex; + + const char *password; + /* prefix for MD5 password */ + union { + struct sockaddr_in v4; + struct sockaddr_in6 v6; + } md5_prefix; + unsigned int prefix_len; + + /* expected addresses and device index for connection */ + int expected_ifindex; + + /* local address */ + union { + struct in_addr in; + struct in6_addr in6; + } expected_laddr; + + /* remote address */ + union { + struct in_addr in; + struct in6_addr in6; + } expected_raddr; +}; + +static int server_mode; +static unsigned int prog_timeout = 5; +static unsigned int interactive; +static int iter = 1; +static char *msg = "Hello world!"; +static int msglen; +static int quiet; +static int try_broadcast = 1; + +static char *timestamp(char *timebuf, int buflen) +{ + time_t now; + + now = time(NULL); + if (strftime(timebuf, buflen, "%T", localtime(&now)) == 0) { + memset(timebuf, 0, buflen); + strncpy(timebuf, "00:00:00", buflen-1); + } + + return timebuf; +} + +static void log_msg(const char *format, ...) +{ + char timebuf[64]; + va_list args; + + if (quiet) + return; + + fprintf(stdout, "%s %s:", + timestamp(timebuf, sizeof(timebuf)), + server_mode ? "server" : "client"); + va_start(args, format); + vfprintf(stdout, format, args); + va_end(args); + + fflush(stdout); +} + +static void log_error(const char *format, ...) +{ + char timebuf[64]; + va_list args; + + if (quiet) + return; + + fprintf(stderr, "%s %s:", + timestamp(timebuf, sizeof(timebuf)), + server_mode ? "server" : "client"); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + fflush(stderr); +} + +static void log_err_errno(const char *fmt, ...) +{ + char timebuf[64]; + va_list args; + + if (quiet) + return; + + fprintf(stderr, "%s %s: ", + timestamp(timebuf, sizeof(timebuf)), + server_mode ? "server" : "client"); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + + fprintf(stderr, ": %d: %s\n", errno, strerror(errno)); + fflush(stderr); +} + +static void log_address(const char *desc, struct sockaddr *sa) +{ + char addrstr[64]; + + if (quiet) + return; + + if (sa->sa_family == AF_INET) { + struct sockaddr_in *s = (struct sockaddr_in *) sa; + + log_msg("%s %s:%d", + desc, + inet_ntop(AF_INET, &s->sin_addr, addrstr, + sizeof(addrstr)), + ntohs(s->sin_port)); + + } else if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; + + log_msg("%s [%s]:%d", + desc, + inet_ntop(AF_INET6, &s6->sin6_addr, addrstr, + sizeof(addrstr)), + ntohs(s6->sin6_port)); + } + + printf("\n"); + + fflush(stdout); +} + +static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args) +{ + int keylen = strlen(args->password); + struct tcp_md5sig md5sig = {}; + int opt = TCP_MD5SIG; + int rc; + + md5sig.tcpm_keylen = keylen; + memcpy(md5sig.tcpm_key, args->password, keylen); + + if (args->prefix_len) { + opt = TCP_MD5SIG_EXT; + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_PREFIX; + + md5sig.tcpm_prefixlen = args->prefix_len; + addr = &args->md5_prefix; + } + memcpy(&md5sig.tcpm_addr, addr, alen); + + if (args->ifindex) { + opt = TCP_MD5SIG_EXT; + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; + + md5sig.tcpm_ifindex = args->ifindex; + } + + rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig)); + if (rc < 0) { + /* ENOENT is harmless. Returned when a password is cleared */ + if (errno == ENOENT) + rc = 0; + else + log_err_errno("setsockopt(TCP_MD5SIG)"); + } + + return rc; +} + +static int tcp_md5_remote(int sd, struct sock_args *args) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + }; + void *addr; + int alen; + + switch (args->version) { + case AF_INET: + sin.sin_port = htons(args->port); + sin.sin_addr = args->remote_addr.in; + addr = &sin; + alen = sizeof(sin); + break; + case AF_INET6: + sin6.sin6_port = htons(args->port); + sin6.sin6_addr = args->remote_addr.in6; + addr = &sin6; + alen = sizeof(sin6); + break; + default: + log_error("unknown address family\n"); + exit(1); + } + + if (tcp_md5sig(sd, addr, alen, args)) + return -1; + + return 0; +} + +static int get_ifidx(const char *ifname) +{ + struct ifreq ifdata; + int sd, rc; + + if (!ifname || *ifname == '\0') + return -1; + + memset(&ifdata, 0, sizeof(ifdata)); + + strcpy(ifdata.ifr_name, ifname); + + sd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); + if (sd < 0) { + log_err_errno("socket failed"); + return -1; + } + + rc = ioctl(sd, SIOCGIFINDEX, (char *)&ifdata); + close(sd); + if (rc != 0) { + log_err_errno("ioctl(SIOCGIFINDEX) failed"); + return -1; + } + + return ifdata.ifr_ifindex; +} + +static int bind_to_device(int sd, const char *name) +{ + int rc; + + rc = setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, strlen(name)+1); + if (rc < 0) + log_err_errno("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static int get_bind_to_device(int sd, char *name, size_t len) +{ + int rc; + socklen_t optlen = len; + + name[0] = '\0'; + rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); + if (rc < 0) + log_err_errno("setsockopt(SO_BINDTODEVICE)"); + + return rc; +} + +static int check_device(int sd, struct sock_args *args) +{ + int ifindex = 0; + char name[32]; + + if (get_bind_to_device(sd, name, sizeof(name))) + *name = '\0'; + else + ifindex = get_ifidx(name); + + log_msg(" bound to device %s/%d\n", + *name ? name : "<none>", ifindex); + + if (!args->expected_ifindex) + return 0; + + if (args->expected_ifindex != ifindex) { + log_error("Device index mismatch: expected %d have %d\n", + args->expected_ifindex, ifindex); + return 1; + } + + log_msg("Device index matches: expected %d have %d\n", + args->expected_ifindex, ifindex); + + return 0; +} + +static int set_pktinfo_v4(int sd) +{ + int one = 1; + int rc; + + rc = setsockopt(sd, SOL_IP, IP_PKTINFO, &one, sizeof(one)); + if (rc < 0 && rc != -ENOTSUP) + log_err_errno("setsockopt(IP_PKTINFO)"); + + return rc; +} + +static int set_recvpktinfo_v6(int sd) +{ + int one = 1; + int rc; + + rc = setsockopt(sd, SOL_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); + if (rc < 0 && rc != -ENOTSUP) + log_err_errno("setsockopt(IPV6_RECVPKTINFO)"); + + return rc; +} + +static int set_recverr_v4(int sd) +{ + int one = 1; + int rc; + + rc = setsockopt(sd, SOL_IP, IP_RECVERR, &one, sizeof(one)); + if (rc < 0 && rc != -ENOTSUP) + log_err_errno("setsockopt(IP_RECVERR)"); + + return rc; +} + +static int set_recverr_v6(int sd) +{ + int one = 1; + int rc; + + rc = setsockopt(sd, SOL_IPV6, IPV6_RECVERR, &one, sizeof(one)); + if (rc < 0 && rc != -ENOTSUP) + log_err_errno("setsockopt(IPV6_RECVERR)"); + + return rc; +} + +static int set_unicast_if(int sd, int ifindex, int version) +{ + int opt = IP_UNICAST_IF; + int level = SOL_IP; + int rc; + + ifindex = htonl(ifindex); + + if (version == AF_INET6) { + opt = IPV6_UNICAST_IF; + level = SOL_IPV6; + } + rc = setsockopt(sd, level, opt, &ifindex, sizeof(ifindex)); + if (rc < 0) + log_err_errno("setsockopt(IP_UNICAST_IF)"); + + return rc; +} + +static int set_multicast_if(int sd, int ifindex) +{ + struct ip_mreqn mreq = { .imr_ifindex = ifindex }; + int rc; + + rc = setsockopt(sd, SOL_IP, IP_MULTICAST_IF, &mreq, sizeof(mreq)); + if (rc < 0) + log_err_errno("setsockopt(IP_MULTICAST_IF)"); + + return rc; +} + +static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex) +{ + uint32_t if_addr = addr; + struct ip_mreqn mreq; + int rc; + + if (addr == htonl(INADDR_ANY) && !ifindex) { + log_error("Either local address or device needs to be given for multicast membership\n"); + return -1; + } + + mreq.imr_multiaddr.s_addr = grp; + mreq.imr_address.s_addr = if_addr; + mreq.imr_ifindex = ifindex; + + rc = setsockopt(sd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + if (rc < 0) { + log_err_errno("setsockopt(IP_ADD_MEMBERSHIP)"); + return -1; + } + + return 0; +} + +static int set_broadcast(int sd) +{ + unsigned int one = 1; + int rc = 0; + + if (setsockopt(sd, SOL_SOCKET, SO_BROADCAST, &one, sizeof(one)) != 0) { + log_err_errno("setsockopt(SO_BROADCAST)"); + rc = -1; + } + + return rc; +} + +static int set_reuseport(int sd) +{ + unsigned int one = 1; + int rc = 0; + + if (setsockopt(sd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) != 0) { + log_err_errno("setsockopt(SO_REUSEPORT)"); + rc = -1; + } + + return rc; +} + +static int set_reuseaddr(int sd) +{ + unsigned int one = 1; + int rc = 0; + + if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) != 0) { + log_err_errno("setsockopt(SO_REUSEADDR)"); + rc = -1; + } + + return rc; +} + +static int str_to_uint(const char *str, int min, int max, unsigned int *value) +{ + int number; + char *end; + + errno = 0; + number = (unsigned int) strtoul(str, &end, 0); + + /* entire string should be consumed by conversion + * and value should be between min and max + */ + if (((*end == '\0') || (*end == '\n')) && (end != str) && + (errno != ERANGE) && (min <= number) && (number <= max)) { + *value = number; + return 0; + } + + return -1; +} + +static int expected_addr_match(struct sockaddr *sa, void *expected, + const char *desc) +{ + char addrstr[64]; + int rc = 0; + + if (sa->sa_family == AF_INET) { + struct sockaddr_in *s = (struct sockaddr_in *) sa; + struct in_addr *exp_in = (struct in_addr *) expected; + + if (s->sin_addr.s_addr != exp_in->s_addr) { + log_error("%s address does not match expected %s", + desc, + inet_ntop(AF_INET, exp_in, + addrstr, sizeof(addrstr))); + rc = 1; + } + } else if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; + struct in6_addr *exp_in = (struct in6_addr *) expected; + + if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) { + log_error("%s address does not match expected %s", + desc, + inet_ntop(AF_INET6, exp_in, + addrstr, sizeof(addrstr))); + rc = 1; + } + } else { + log_error("%s address does not match expected - unknown family", + desc); + rc = 1; + } + + if (!rc) + log_msg("%s address matches expected\n", desc); + + return rc; +} + +static int show_sockstat(int sd, struct sock_args *args) +{ + struct sockaddr_in6 local_addr, remote_addr; + socklen_t alen = sizeof(local_addr); + struct sockaddr *sa; + const char *desc; + int rc = 0; + + desc = server_mode ? "server local:" : "client local:"; + sa = (struct sockaddr *) &local_addr; + if (getsockname(sd, sa, &alen) == 0) { + log_address(desc, sa); + + if (args->has_expected_laddr) { + rc = expected_addr_match(sa, &args->expected_laddr, + "local"); + } + } else { + log_err_errno("getsockname failed"); + } + + sa = (struct sockaddr *) &remote_addr; + desc = server_mode ? "server peer:" : "client peer:"; + if (getpeername(sd, sa, &alen) == 0) { + log_address(desc, sa); + + if (args->has_expected_raddr) { + rc |= expected_addr_match(sa, &args->expected_raddr, + "remote"); + } + } else { + log_err_errno("getpeername failed"); + } + + return rc; +} + +static int get_index_from_cmsg(struct msghdr *m) +{ + struct cmsghdr *cm; + int ifindex = 0; + char buf[64]; + + for (cm = (struct cmsghdr *)CMSG_FIRSTHDR(m); + m->msg_controllen != 0 && cm; + cm = (struct cmsghdr *)CMSG_NXTHDR(m, cm)) { + + if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *pi; + + pi = (struct in_pktinfo *)(CMSG_DATA(cm)); + inet_ntop(AF_INET, &pi->ipi_addr, buf, sizeof(buf)); + ifindex = pi->ipi_ifindex; + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *pi6; + + pi6 = (struct in6_pktinfo *)(CMSG_DATA(cm)); + inet_ntop(AF_INET6, &pi6->ipi6_addr, buf, sizeof(buf)); + ifindex = pi6->ipi6_ifindex; + } + } + + if (ifindex) { + log_msg(" pktinfo: ifindex %d dest addr %s\n", + ifindex, buf); + } + return ifindex; +} + +static int send_msg_no_cmsg(int sd, void *addr, socklen_t alen) +{ + int err; + +again: + err = sendto(sd, msg, msglen, 0, addr, alen); + if (err < 0) { + if (errno == EACCES && try_broadcast) { + try_broadcast = 0; + if (!set_broadcast(sd)) + goto again; + errno = EACCES; + } + + log_err_errno("sendto failed"); + return 1; + } + + return 0; +} + +static int send_msg_cmsg(int sd, void *addr, socklen_t alen, + int ifindex, int version) +{ + unsigned char cmsgbuf[64]; + struct iovec iov[2]; + struct cmsghdr *cm; + struct msghdr m; + int err; + + iov[0].iov_base = msg; + iov[0].iov_len = msglen; + m.msg_iov = iov; + m.msg_iovlen = 1; + m.msg_name = (caddr_t)addr; + m.msg_namelen = alen; + + memset(cmsgbuf, 0, sizeof(cmsgbuf)); + cm = (struct cmsghdr *)cmsgbuf; + m.msg_control = (caddr_t)cm; + + if (version == AF_INET) { + struct in_pktinfo *pi; + + cm->cmsg_level = SOL_IP; + cm->cmsg_type = IP_PKTINFO; + cm->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); + pi = (struct in_pktinfo *)(CMSG_DATA(cm)); + pi->ipi_ifindex = ifindex; + + m.msg_controllen = cm->cmsg_len; + + } else if (version == AF_INET6) { + struct in6_pktinfo *pi6; + + cm->cmsg_level = SOL_IPV6; + cm->cmsg_type = IPV6_PKTINFO; + cm->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); + + pi6 = (struct in6_pktinfo *)(CMSG_DATA(cm)); + pi6->ipi6_ifindex = ifindex; + + m.msg_controllen = cm->cmsg_len; + } + +again: + err = sendmsg(sd, &m, 0); + if (err < 0) { + if (errno == EACCES && try_broadcast) { + try_broadcast = 0; + if (!set_broadcast(sd)) + goto again; + errno = EACCES; + } + + log_err_errno("sendmsg failed"); + return 1; + } + + return 0; +} + + +static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args) +{ + if (args->type == SOCK_STREAM) { + if (write(sd, msg, msglen) < 0) { + log_err_errno("write failed sending msg to peer"); + return 1; + } + } else if (args->ifindex && args->use_cmsg) { + if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version)) + return 1; + } else { + if (send_msg_no_cmsg(sd, addr, alen)) + return 1; + } + + log_msg("Sent message:\n"); + log_msg(" %.24s%s\n", msg, msglen > 24 ? " ..." : ""); + + return 0; +} + +static int socket_read_dgram(int sd, struct sock_args *args) +{ + unsigned char addr[sizeof(struct sockaddr_in6)]; + struct sockaddr *sa = (struct sockaddr *) addr; + socklen_t alen = sizeof(addr); + struct iovec iov[2]; + struct msghdr m = { + .msg_name = (caddr_t)addr, + .msg_namelen = alen, + .msg_iov = iov, + .msg_iovlen = 1, + }; + unsigned char cmsgbuf[256]; + struct cmsghdr *cm = (struct cmsghdr *)cmsgbuf; + char buf[16*1024]; + int ifindex; + int len; + + iov[0].iov_base = (caddr_t)buf; + iov[0].iov_len = sizeof(buf); + + memset(cmsgbuf, 0, sizeof(cmsgbuf)); + m.msg_control = (caddr_t)cm; + m.msg_controllen = sizeof(cmsgbuf); + + len = recvmsg(sd, &m, 0); + if (len == 0) { + log_msg("peer closed connection.\n"); + return 0; + } else if (len < 0) { + log_msg("failed to read message: %d: %s\n", + errno, strerror(errno)); + return -1; + } + + buf[len] = '\0'; + + log_address("Message from:", sa); + log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : ""); + + ifindex = get_index_from_cmsg(&m); + if (args->expected_ifindex) { + if (args->expected_ifindex != ifindex) { + log_error("Device index mismatch: expected %d have %d\n", + args->expected_ifindex, ifindex); + return -1; + } + log_msg("Device index matches: expected %d have %d\n", + args->expected_ifindex, ifindex); + } + + if (!interactive && server_mode) { + if (sa->sa_family == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; + struct in6_addr *in6 = &s6->sin6_addr; + + if (IN6_IS_ADDR_V4MAPPED(in6)) { + const uint32_t *pa = (uint32_t *) &in6->s6_addr; + struct in_addr in4; + struct sockaddr_in *sin; + + sin = (struct sockaddr_in *) addr; + pa += 3; + in4.s_addr = *pa; + sin->sin_addr = in4; + sin->sin_family = AF_INET; + if (send_msg_cmsg(sd, addr, alen, + ifindex, AF_INET) < 0) + goto out_err; + } + } +again: + iov[0].iov_len = len; + + if (args->version == AF_INET6) { + struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa; + + if (args->dev) { + /* avoid PKTINFO conflicts with bindtodev */ + if (sendto(sd, buf, len, 0, + (void *) addr, alen) < 0) + goto out_err; + } else { + /* kernel is allowing scope_id to be set to VRF + * index for LLA. for sends to global address + * reset scope id + */ + s6->sin6_scope_id = ifindex; + if (sendmsg(sd, &m, 0) < 0) + goto out_err; + } + } else { + int err; + + err = sendmsg(sd, &m, 0); + if (err < 0) { + if (errno == EACCES && try_broadcast) { + try_broadcast = 0; + if (!set_broadcast(sd)) + goto again; + errno = EACCES; + } + goto out_err; + } + } + log_msg("Sent message:\n"); + log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : ""); + } + + return 1; +out_err: + log_err_errno("failed to send msg to peer"); + return -1; +} + +static int socket_read_stream(int sd) +{ + char buf[1024]; + int len; + + len = read(sd, buf, sizeof(buf)-1); + if (len == 0) { + log_msg("client closed connection.\n"); + return 0; + } else if (len < 0) { + log_msg("failed to read message\n"); + return -1; + } + + buf[len] = '\0'; + log_msg("Incoming message:\n"); + log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : ""); + + if (!interactive && server_mode) { + if (write(sd, buf, len) < 0) { + log_err_errno("failed to send buf"); + return -1; + } + log_msg("Sent message:\n"); + log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : ""); + } + + return 1; +} + +static int socket_read(int sd, struct sock_args *args) +{ + if (args->type == SOCK_STREAM) + return socket_read_stream(sd); + + return socket_read_dgram(sd, args); +} + +static int stdin_to_socket(int sd, int type, void *addr, socklen_t alen) +{ + char buf[1024]; + int len; + + if (fgets(buf, sizeof(buf), stdin) == NULL) + return 0; + + len = strlen(buf); + if (type == SOCK_STREAM) { + if (write(sd, buf, len) < 0) { + log_err_errno("failed to send buf"); + return -1; + } + } else { + int err; + +again: + err = sendto(sd, buf, len, 0, addr, alen); + if (err < 0) { + if (errno == EACCES && try_broadcast) { + try_broadcast = 0; + if (!set_broadcast(sd)) + goto again; + errno = EACCES; + } + log_err_errno("failed to send msg to peer"); + return -1; + } + } + log_msg("Sent message:\n"); + log_msg(" %.24s%s\n", buf, len > 24 ? " ..." : ""); + + return 1; +} + +static void set_recv_attr(int sd, int version) +{ + if (version == AF_INET6) { + set_recvpktinfo_v6(sd); + set_recverr_v6(sd); + } else { + set_pktinfo_v4(sd); + set_recverr_v4(sd); + } +} + +static int msg_loop(int client, int sd, void *addr, socklen_t alen, + struct sock_args *args) +{ + struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL; + fd_set rfds; + int nfds; + int rc; + + if (args->type != SOCK_STREAM) + set_recv_attr(sd, args->version); + + if (msg) { + msglen = strlen(msg); + + /* client sends first message */ + if (client) { + if (send_msg(sd, addr, alen, args)) + return 1; + } + if (!interactive) { + ptval = &timeout; + if (!prog_timeout) + timeout.tv_sec = 5; + } + } + + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + while (1) { + FD_ZERO(&rfds); + FD_SET(sd, &rfds); + if (interactive) + FD_SET(fileno(stdin), &rfds); + + rc = select(nfds, &rfds, NULL, NULL, ptval); + if (rc < 0) { + if (errno == EINTR) + continue; + + rc = 1; + log_err_errno("select failed"); + break; + } else if (rc == 0) { + log_error("Timed out waiting for response\n"); + rc = 2; + break; + } + + if (FD_ISSET(sd, &rfds)) { + rc = socket_read(sd, args); + if (rc < 0) { + rc = 1; + break; + } + if (rc == 0) + break; + } + + rc = 0; + + if (FD_ISSET(fileno(stdin), &rfds)) { + if (stdin_to_socket(sd, args->type, addr, alen) <= 0) + break; + } + + if (interactive) + continue; + + if (iter != -1) { + --iter; + if (iter == 0) + break; + } + + log_msg("Going into quiet mode\n"); + quiet = 1; + + if (client) { + if (send_msg(sd, addr, alen, args)) { + rc = 1; + break; + } + } + } + + return rc; +} + +static int msock_init(struct sock_args *args, int server) +{ + uint32_t if_addr = htonl(INADDR_ANY); + struct sockaddr_in laddr = { + .sin_family = AF_INET, + .sin_port = htons(args->port), + }; + int one = 1; + int sd; + + if (!server && args->has_local_ip) + if_addr = args->local_addr.in.s_addr; + + sd = socket(PF_INET, SOCK_DGRAM, 0); + if (sd < 0) { + log_err_errno("socket"); + return -1; + } + + if (setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)) < 0) { + log_err_errno("Setting SO_REUSEADDR error"); + goto out_err; + } + + if (setsockopt(sd, SOL_SOCKET, SO_BROADCAST, + (char *)&one, sizeof(one)) < 0) + log_err_errno("Setting SO_BROADCAST error"); + + if (args->dev && bind_to_device(sd, args->dev) != 0) + goto out_err; + else if (args->use_setsockopt && + set_multicast_if(sd, args->ifindex)) + goto out_err; + + laddr.sin_addr.s_addr = if_addr; + + if (bind(sd, (struct sockaddr *) &laddr, sizeof(laddr)) < 0) { + log_err_errno("bind failed"); + goto out_err; + } + + if (server && + set_membership(sd, args->grp.s_addr, + args->local_addr.in.s_addr, args->ifindex)) + goto out_err; + + return sd; +out_err: + close(sd); + return -1; +} + +static int msock_server(struct sock_args *args) +{ + return msock_init(args, 1); +} + +static int msock_client(struct sock_args *args) +{ + return msock_init(args, 0); +} + +static int bind_socket(int sd, struct sock_args *args) +{ + struct sockaddr_in serv_addr = { + .sin_family = AF_INET, + }; + struct sockaddr_in6 serv6_addr = { + .sin6_family = AF_INET6, + }; + void *addr; + socklen_t alen; + + if (!args->has_local_ip && args->type == SOCK_RAW) + return 0; + + switch (args->version) { + case AF_INET: + serv_addr.sin_port = htons(args->port); + serv_addr.sin_addr = args->local_addr.in; + addr = &serv_addr; + alen = sizeof(serv_addr); + break; + + case AF_INET6: + serv6_addr.sin6_port = htons(args->port); + serv6_addr.sin6_addr = args->local_addr.in6; + addr = &serv6_addr; + alen = sizeof(serv6_addr); + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + if (bind(sd, addr, alen) < 0) { + log_err_errno("error binding socket"); + return -1; + } + + return 0; +} + +static int lsock_init(struct sock_args *args) +{ + long flags; + int sd; + + sd = socket(args->version, args->type, args->protocol); + if (sd < 0) { + log_err_errno("Error opening socket"); + return -1; + } + + if (set_reuseaddr(sd) != 0) + goto err; + + if (set_reuseport(sd) != 0) + goto err; + + if (args->dev && bind_to_device(sd, args->dev) != 0) + goto err; + else if (args->use_setsockopt && + set_unicast_if(sd, args->ifindex, args->version)) + goto err; + + if (bind_socket(sd, args)) + goto err; + + if (args->bind_test_only) + goto out; + + if (args->type == SOCK_STREAM && listen(sd, 1) < 0) { + log_err_errno("listen failed"); + goto err; + } + + flags = fcntl(sd, F_GETFL); + if ((flags < 0) || (fcntl(sd, F_SETFL, flags|O_NONBLOCK) < 0)) { + log_err_errno("Failed to set non-blocking option"); + goto err; + } + + if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0) + log_err_errno("Failed to set close-on-exec flag"); + +out: + return sd; + +err: + close(sd); + return -1; +} + +static int do_server(struct sock_args *args) +{ + struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL; + unsigned char addr[sizeof(struct sockaddr_in6)] = {}; + socklen_t alen = sizeof(addr); + int lsd, csd = -1; + + fd_set rfds; + int rc; + + if (prog_timeout) + ptval = &timeout; + + if (args->has_grp) + lsd = msock_server(args); + else + lsd = lsock_init(args); + + if (lsd < 0) + return 1; + + if (args->bind_test_only) { + close(lsd); + return 0; + } + + if (args->type != SOCK_STREAM) { + rc = msg_loop(0, lsd, (void *) addr, alen, args); + close(lsd); + return rc; + } + + if (args->password && tcp_md5_remote(lsd, args)) { + close(lsd); + return 1; + } + + while (1) { + log_msg("\n"); + log_msg("waiting for client connection.\n"); + FD_ZERO(&rfds); + FD_SET(lsd, &rfds); + + rc = select(lsd+1, &rfds, NULL, NULL, ptval); + if (rc == 0) { + rc = 2; + break; + } + + if (rc < 0) { + if (errno == EINTR) + continue; + + log_err_errno("select failed"); + break; + } + + if (FD_ISSET(lsd, &rfds)) { + + csd = accept(lsd, (void *) addr, &alen); + if (csd < 0) { + log_err_errno("accept failed"); + break; + } + + rc = show_sockstat(csd, args); + if (rc) + break; + + rc = check_device(csd, args); + if (rc) + break; + } + + rc = msg_loop(0, csd, (void *) addr, alen, args); + close(csd); + + if (!interactive) + break; + } + + close(lsd); + + return rc; +} + +static int wait_for_connect(int sd) +{ + struct timeval _tv = { .tv_sec = prog_timeout }, *tv = NULL; + fd_set wfd; + int val = 0, sz = sizeof(val); + int rc; + + FD_ZERO(&wfd); + FD_SET(sd, &wfd); + + if (prog_timeout) + tv = &_tv; + + rc = select(FD_SETSIZE, NULL, &wfd, NULL, tv); + if (rc == 0) { + log_error("connect timed out\n"); + return -2; + } else if (rc < 0) { + log_err_errno("select failed"); + return -3; + } + + if (getsockopt(sd, SOL_SOCKET, SO_ERROR, &val, (socklen_t *)&sz) < 0) { + log_err_errno("getsockopt(SO_ERROR) failed"); + return -4; + } + + if (val != 0) { + log_error("connect failed: %d: %s\n", val, strerror(val)); + return -1; + } + + return 0; +} + +static int connectsock(void *addr, socklen_t alen, struct sock_args *args) +{ + int sd, rc = -1; + long flags; + + sd = socket(args->version, args->type, args->protocol); + if (sd < 0) { + log_err_errno("Failed to create socket"); + return -1; + } + + flags = fcntl(sd, F_GETFL); + if ((flags < 0) || (fcntl(sd, F_SETFL, flags|O_NONBLOCK) < 0)) { + log_err_errno("Failed to set non-blocking option"); + goto err; + } + + if (set_reuseport(sd) != 0) + goto err; + + if (args->dev && bind_to_device(sd, args->dev) != 0) + goto err; + else if (args->use_setsockopt && + set_unicast_if(sd, args->ifindex, args->version)) + goto err; + + if (args->has_local_ip && bind_socket(sd, args)) + goto err; + + if (args->type != SOCK_STREAM) + goto out; + + if (args->password && tcp_md5sig(sd, addr, alen, args)) + goto err; + + if (args->bind_test_only) + goto out; + + if (connect(sd, addr, alen) < 0) { + if (errno != EINPROGRESS) { + log_err_errno("Failed to connect to remote host"); + rc = -1; + goto err; + } + rc = wait_for_connect(sd); + if (rc < 0) + goto err; + } +out: + return sd; + +err: + close(sd); + return rc; +} + +static int do_client(struct sock_args *args) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + }; + void *addr; + int alen; + int rc = 0; + int sd; + + if (!args->has_remote_ip && !args->has_grp) { + fprintf(stderr, "remote IP or multicast group not given\n"); + return 1; + } + + switch (args->version) { + case AF_INET: + sin.sin_port = htons(args->port); + if (args->has_grp) + sin.sin_addr = args->grp; + else + sin.sin_addr = args->remote_addr.in; + addr = &sin; + alen = sizeof(sin); + break; + case AF_INET6: + sin6.sin6_port = htons(args->port); + sin6.sin6_addr = args->remote_addr.in6; + sin6.sin6_scope_id = args->scope_id; + addr = &sin6; + alen = sizeof(sin6); + break; + } + + if (args->has_grp) + sd = msock_client(args); + else + sd = connectsock(addr, alen, args); + + if (sd < 0) + return -sd; + + if (args->bind_test_only) + goto out; + + if (args->type == SOCK_STREAM) { + rc = show_sockstat(sd, args); + if (rc != 0) + goto out; + } + + rc = msg_loop(1, sd, addr, alen, args); + +out: + close(sd); + + return rc; +} + +enum addr_type { + ADDR_TYPE_LOCAL, + ADDR_TYPE_REMOTE, + ADDR_TYPE_MCAST, + ADDR_TYPE_EXPECTED_LOCAL, + ADDR_TYPE_EXPECTED_REMOTE, + ADDR_TYPE_MD5_PREFIX, +}; + +static int convert_addr(struct sock_args *args, const char *_str, + enum addr_type atype) +{ + int pfx_len_max = args->version == AF_INET6 ? 128 : 32; + int family = args->version; + char *str, *dev, *sep; + struct in6_addr *in6; + struct in_addr *in; + const char *desc; + void *addr; + int rc = 0; + + str = strdup(_str); + if (!str) + return -ENOMEM; + + switch (atype) { + case ADDR_TYPE_LOCAL: + desc = "local"; + addr = &args->local_addr; + break; + case ADDR_TYPE_REMOTE: + desc = "remote"; + addr = &args->remote_addr; + break; + case ADDR_TYPE_MCAST: + desc = "mcast grp"; + addr = &args->grp; + break; + case ADDR_TYPE_EXPECTED_LOCAL: + desc = "expected local"; + addr = &args->expected_laddr; + break; + case ADDR_TYPE_EXPECTED_REMOTE: + desc = "expected remote"; + addr = &args->expected_raddr; + break; + case ADDR_TYPE_MD5_PREFIX: + desc = "md5 prefix"; + if (family == AF_INET) { + args->md5_prefix.v4.sin_family = AF_INET; + addr = &args->md5_prefix.v4.sin_addr; + } else if (family == AF_INET6) { + args->md5_prefix.v6.sin6_family = AF_INET6; + addr = &args->md5_prefix.v6.sin6_addr; + } else + return 1; + + sep = strchr(str, '/'); + if (sep) { + *sep = '\0'; + sep++; + if (str_to_uint(sep, 1, pfx_len_max, + &args->prefix_len) != 0) { + fprintf(stderr, "Invalid port\n"); + return 1; + } + } else { + args->prefix_len = pfx_len_max; + } + break; + default: + log_error("unknown address type"); + exit(1); + } + + switch (family) { + case AF_INET: + in = (struct in_addr *) addr; + if (str) { + if (inet_pton(AF_INET, str, in) == 0) { + log_error("Invalid %s IP address\n", desc); + rc = -1; + goto out; + } + } else { + in->s_addr = htonl(INADDR_ANY); + } + break; + + case AF_INET6: + dev = strchr(str, '%'); + if (dev) { + *dev = '\0'; + dev++; + } + + in6 = (struct in6_addr *) addr; + if (str) { + if (inet_pton(AF_INET6, str, in6) == 0) { + log_error("Invalid %s IPv6 address\n", desc); + rc = -1; + goto out; + } + } else { + *in6 = in6addr_any; + } + if (dev) { + args->scope_id = get_ifidx(dev); + if (args->scope_id < 0) { + log_error("Invalid scope on %s IPv6 address\n", + desc); + rc = -1; + goto out; + } + } + break; + + default: + log_error("Invalid address family\n"); + } + +out: + free(str); + return rc; +} + +static char *random_msg(int len) +{ + int i, n = 0, olen = len + 1; + char *m; + + if (len <= 0) + return NULL; + + m = malloc(olen); + if (!m) + return NULL; + + while (len > 26) { + i = snprintf(m + n, olen - n, "%.26s", + "abcdefghijklmnopqrstuvwxyz"); + n += i; + len -= i; + } + i = snprintf(m + n, olen - n, "%.*s", len, + "abcdefghijklmnopqrstuvwxyz"); + return m; +} + +#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq" + +static void print_usage(char *prog) +{ + printf( + "usage: %s OPTS\n" + "Required:\n" + " -r addr remote address to connect to (client mode only)\n" + " -p port port to connect to (client mode)/listen on (server mode)\n" + " (default: %d)\n" + " -s server mode (default: client mode)\n" + " -t timeout seconds (default: none)\n" + "\n" + "Optional:\n" + " -F Restart server loop\n" + " -6 IPv6 (default is IPv4)\n" + " -P proto protocol for socket: icmp, ospf (default: none)\n" + " -D|R datagram (D) / raw (R) socket (default stream)\n" + " -l addr local address to bind to\n" + "\n" + " -d dev bind socket to given device name\n" + " -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n" + " to set device binding\n" + " -C use cmsg and IP_PKTINFO to specify device binding\n" + "\n" + " -L len send random message of given length\n" + " -n num number of times to send message\n" + "\n" + " -M password use MD5 sum protection\n" + " -m prefix/len prefix and length to use for MD5 key\n" + " -g grp multicast group (e.g., 239.1.1.1)\n" + " -i interactive mode (default is echo and terminate)\n" + "\n" + " -0 addr Expected local address\n" + " -1 addr Expected remote address\n" + " -2 dev Expected device name (or index) to receive packet\n" + "\n" + " -b Bind test only.\n" + " -q Be quiet. Run test without printing anything.\n" + , prog, DEFAULT_PORT); +} + +int main(int argc, char *argv[]) +{ + struct sock_args args = { + .version = AF_INET, + .type = SOCK_STREAM, + .port = DEFAULT_PORT, + }; + struct protoent *pe; + unsigned int tmp; + int forever = 0; + + /* process inputs */ + extern char *optarg; + int rc = 0; + + /* + * process input args + */ + + while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { + switch (rc) { + case 's': + server_mode = 1; + break; + case 'F': + forever = 1; + break; + case 'l': + args.has_local_ip = 1; + if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0) + return 1; + break; + case 'r': + args.has_remote_ip = 1; + if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0) + return 1; + break; + case 'p': + if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { + fprintf(stderr, "Invalid port\n"); + return 1; + } + args.port = (unsigned short) tmp; + break; + case 't': + if (str_to_uint(optarg, 0, INT_MAX, + &prog_timeout) != 0) { + fprintf(stderr, "Invalid timeout\n"); + return 1; + } + break; + case 'D': + args.type = SOCK_DGRAM; + break; + case 'R': + args.type = SOCK_RAW; + args.port = 0; + break; + case 'P': + pe = getprotobyname(optarg); + if (pe) { + args.protocol = pe->p_proto; + } else { + if (str_to_uint(optarg, 0, 0xffff, &tmp) != 0) { + fprintf(stderr, "Invalid protocol\n"); + return 1; + } + args.protocol = tmp; + } + break; + case 'n': + iter = atoi(optarg); + break; + case 'L': + msg = random_msg(atoi(optarg)); + break; + case 'M': + args.password = optarg; + break; + case 'm': + if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0) + return 1; + break; + case 'S': + args.use_setsockopt = 1; + break; + case 'C': + args.use_cmsg = 1; + break; + case 'd': + args.dev = optarg; + args.ifindex = get_ifidx(optarg); + if (args.ifindex < 0) { + fprintf(stderr, "Invalid device name\n"); + return 1; + } + break; + case 'i': + interactive = 1; + break; + case 'g': + args.has_grp = 1; + if (convert_addr(&args, optarg, ADDR_TYPE_MCAST) < 0) + return 1; + args.type = SOCK_DGRAM; + break; + case '6': + args.version = AF_INET6; + break; + case 'b': + args.bind_test_only = 1; + break; + case '0': + args.has_expected_laddr = 1; + if (convert_addr(&args, optarg, + ADDR_TYPE_EXPECTED_LOCAL)) + return 1; + break; + case '1': + args.has_expected_raddr = 1; + if (convert_addr(&args, optarg, + ADDR_TYPE_EXPECTED_REMOTE)) + return 1; + + break; + case '2': + if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) { + args.expected_ifindex = (int)tmp; + } else { + args.expected_ifindex = get_ifidx(optarg); + if (args.expected_ifindex < 0) { + fprintf(stderr, + "Invalid expected device\n"); + return 1; + } + } + break; + case 'q': + quiet = 1; + break; + default: + print_usage(argv[0]); + return 1; + } + } + + if (args.password && + ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) { + log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n"); + return 1; + } + + if (args.prefix_len && !args.password) { + log_error("Prefix range for MD5 protection specified without a password\n"); + return 1; + } + + if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) { + fprintf(stderr, "Device binding not specified\n"); + return 1; + } + if (args.use_setsockopt || args.use_cmsg) + args.dev = NULL; + + if (iter == 0) { + fprintf(stderr, "Invalid number of messages to send\n"); + return 1; + } + + if (args.type == SOCK_STREAM && !args.protocol) + args.protocol = IPPROTO_TCP; + if (args.type == SOCK_DGRAM && !args.protocol) + args.protocol = IPPROTO_UDP; + + if ((args.type == SOCK_STREAM || args.type == SOCK_DGRAM) && + args.port == 0) { + fprintf(stderr, "Invalid port number\n"); + return 1; + } + + if (!server_mode && !args.has_grp && + !args.has_remote_ip && !args.has_local_ip) { + fprintf(stderr, + "Local (server mode) or remote IP (client IP) required\n"); + return 1; + } + + if (interactive) { + prog_timeout = 0; + msg = NULL; + } + + if (server_mode) { + do { + rc = do_server(&args); + } while (forever); + + return rc; + } + return do_client(&args); +} diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index ab367e75f095..71a62e7e35b1 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -11,9 +11,9 @@ # R1 and R2 (also implemented with namespaces), with different MTUs: # # segment a_r1 segment b_r1 a_r1: 2000 -# .--------------R1--------------. a_r2: 1500 -# A B a_r3: 2000 -# '--------------R2--------------' a_r4: 1400 +# .--------------R1--------------. b_r1: 1400 +# A B a_r2: 2000 +# '--------------R2--------------' b_r2: 1500 # segment a_r2 segment b_r2 # # Check that PMTU exceptions with the correct PMTU are created. Then @@ -1249,8 +1249,7 @@ test_list_flush_ipv4_exception() { done run_cmd ${ns_a} ping -q -M want -i 0.1 -c 2 -s 1800 "${dst2}" - # Each exception is printed as two lines - if [ "$(${ns_a} ip route list cache | wc -l)" -ne 202 ]; then + if [ "$(${ns_a} ip -oneline route list cache | wc -l)" -ne 101 ]; then err " can't list cached exceptions" fail=1 fi @@ -1300,7 +1299,7 @@ test_list_flush_ipv6_exception() { run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst_prefix1}${i}" done run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s 1800 "${dst2}" - if [ "$(${ns_a} ip -6 route list cache | wc -l)" -ne 101 ]; then + if [ "$(${ns_a} ip -oneline -6 route list cache | wc -l)" -ne 101 ]; then err " can't list cached exceptions" fail=1 fi diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fe3230c55986..fb7a59ed759e 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto) { struct epoll_event ev; int epfd, i, test_fd; - uint16_t test_family; + int test_family; socklen_t len; epfd = epoll_create(1); @@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto) send_from_v4(proto); test_fd = receive_once(epfd, proto); + len = sizeof(test_family); if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) error(1, errno, "failed to read socket domain"); if (test_family != AF_INET) diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 53f598f06647..383bac05ac32 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -12,7 +12,11 @@ #include <arpa/inet.h> #include <error.h> #include <errno.h> +#include <inttypes.h> #include <linux/net_tstamp.h> +#include <linux/errqueue.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> @@ -28,7 +32,7 @@ static int cfg_clockid = CLOCK_TAI; static bool cfg_do_ipv4; static bool cfg_do_ipv6; static uint16_t cfg_port = 8000; -static int cfg_variance_us = 2000; +static int cfg_variance_us = 4000; static uint64_t glob_tstart; @@ -43,6 +47,9 @@ static struct timed_send cfg_in[MAX_NUM_PKT]; static struct timed_send cfg_out[MAX_NUM_PKT]; static int cfg_num_pkt; +static int cfg_errq_level; +static int cfg_errq_type; + static uint64_t gettime_ns(void) { struct timespec ts; @@ -90,13 +97,15 @@ static void do_send_one(int fdt, struct timed_send *ts) } -static void do_recv_one(int fdr, struct timed_send *ts) +static bool do_recv_one(int fdr, struct timed_send *ts) { int64_t tstop, texpect; char rbuf[2]; int ret; ret = recv(fdr, rbuf, sizeof(rbuf), 0); + if (ret == -1 && errno == EAGAIN) + return true; if (ret == -1) error(1, errno, "read"); if (ret != 1) @@ -105,14 +114,16 @@ static void do_recv_one(int fdr, struct timed_send *ts) tstop = (gettime_ns() - glob_tstart) / 1000; texpect = ts->delay_us >= 0 ? ts->delay_us : 0; - fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n", - rbuf[0], tstop, texpect); + fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n", + rbuf[0], (long long)tstop, (long long)texpect); if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); if (labs(tstop - texpect) > cfg_variance_us) error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + + return false; } static void do_recv_verify_empty(int fdr) @@ -125,12 +136,70 @@ static void do_recv_verify_empty(int fdr) error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno); } +static void do_recv_errqueue_timeout(int fdt) +{ + char control[CMSG_SPACE(sizeof(struct sock_extended_err)) + + CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0}; + char data[sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + 1]; + struct sock_extended_err *err; + struct msghdr msg = {0}; + struct iovec iov = {0}; + struct cmsghdr *cm; + int64_t tstamp = 0; + int ret; + + iov.iov_base = data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + while (1) { + ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + break; + if (ret == -1) + error(1, errno, "errqueue"); + if (msg.msg_flags != MSG_ERRQUEUE) + error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags); + + cm = CMSG_FIRSTHDR(&msg); + if (cm->cmsg_level != cfg_errq_level || + cm->cmsg_type != cfg_errq_type) + error(1, 0, "errqueue: type 0x%x.0x%x\n", + cm->cmsg_level, cm->cmsg_type); + + err = (struct sock_extended_err *)CMSG_DATA(cm); + if (err->ee_origin != SO_EE_ORIGIN_TXTIME) + error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin); + if (err->ee_code != ECANCELED) + error(1, 0, "errqueue: code 0x%x\n", err->ee_code); + + tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info; + tstamp -= (int64_t) glob_tstart; + tstamp /= 1000 * 1000; + fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n", + data[ret - 1], tstamp); + + msg.msg_flags = 0; + msg.msg_controllen = sizeof(control); + } + + error(1, 0, "recv: timeout"); +} + static void setsockopt_txtime(int fd) { struct sock_txtime so_txtime_val = { .clockid = cfg_clockid }; struct sock_txtime so_txtime_val_read = { 0 }; socklen_t vallen = sizeof(so_txtime_val); + so_txtime_val.flags = SOF_TXTIME_REPORT_ERRORS; + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, &so_txtime_val, sizeof(so_txtime_val))) error(1, errno, "setsockopt txtime"); @@ -194,7 +263,8 @@ static void do_test(struct sockaddr *addr, socklen_t alen) for (i = 0; i < cfg_num_pkt; i++) do_send_one(fdt, &cfg_in[i]); for (i = 0; i < cfg_num_pkt; i++) - do_recv_one(fdr, &cfg_out[i]); + if (do_recv_one(fdr, &cfg_out[i])) + do_recv_errqueue_timeout(fdt); do_recv_verify_empty(fdr); @@ -280,6 +350,10 @@ int main(int argc, char **argv) addr6.sin6_family = AF_INET6; addr6.sin6_port = htons(cfg_port); addr6.sin6_addr = in6addr_loopback; + + cfg_errq_level = SOL_IPV6; + cfg_errq_type = IPV6_RECVERR; + do_test((void *)&addr6, sizeof(addr6)); } @@ -289,6 +363,10 @@ int main(int argc, char **argv) addr4.sin_family = AF_INET; addr4.sin_port = htons(cfg_port); addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + + cfg_errq_level = SOL_IP; + cfg_errq_type = IP_RECVERR; + do_test((void *)&addr4, sizeof(addr4)); } diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 5aa519328a5b..3f7800eaecb1 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,7 +5,12 @@ # Run in network namespace if [[ $# -eq 0 ]]; then - ./in_netns.sh $0 __subprocess + if ! ./in_netns.sh $0 __subprocess; then + # test is time sensitive, can be flaky + echo "test failed: retry once" + ./in_netns.sh $0 __subprocess + fi + exit $? fi @@ -18,7 +23,7 @@ tc qdisc add dev lo root fq ./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20 ./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20 -if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then +if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then ! ./so_txtime -4 -6 -c tai a,-1 a,-1 ! ./so_txtime -4 -6 -c tai a,0 a,0 ./so_txtime -4 -6 -c tai a,10 a,10 diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 31ced79f4f25..35505b31e5cc 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -71,7 +71,7 @@ #define MSG_ZEROCOPY 0x4000000 #endif -#define FILE_SZ (1UL << 35) +#define FILE_SZ (1ULL << 35) static int cfg_family = AF_INET6; static socklen_t cfg_alen = sizeof(struct sockaddr_in6); static int cfg_port = 8787; @@ -82,7 +82,9 @@ static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for recei static int xflg; /* hash received data (simple xor) (-h option) */ static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ -static int chunk_size = 512*1024; +static size_t chunk_size = 512*1024; + +static size_t map_align; unsigned long htotal; @@ -118,6 +120,9 @@ void hash_zone(void *zone, unsigned int length) htotal = temp; } +#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) +#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) + void *child_thread(void *arg) { unsigned long total_mmap = 0, total = 0; @@ -126,6 +131,7 @@ void *child_thread(void *arg) int flags = MAP_SHARED; struct timeval t0, t1; char *buffer = NULL; + void *raddr = NULL; void *addr = NULL; double throughput; struct rusage ru; @@ -142,9 +148,13 @@ void *child_thread(void *arg) goto error; } if (zflg) { - addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0); - if (addr == (void *)-1) + raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0); + if (raddr == (void *)-1) { + perror("mmap"); zflg = 0; + } else { + addr = ALIGN_PTR_UP(raddr, map_align); + } } while (1) { struct pollfd pfd = { .fd = fd, .events = POLLIN, }; @@ -155,7 +165,7 @@ void *child_thread(void *arg) socklen_t zc_len = sizeof(zc); int res; - zc.address = (__u64)addr; + zc.address = (__u64)((unsigned long)addr); zc.length = chunk_size; zc.recv_skip_hint = 0; res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, @@ -222,7 +232,7 @@ error: free(buffer); close(fd); if (zflg) - munmap(addr, chunk_size); + munmap(raddr, chunk_size + map_align); pthread_exit(0); } @@ -270,6 +280,11 @@ static void setup_sockaddr(int domain, const char *str_addr, static void do_accept(int fdlisten) { + pthread_attr_t attr; + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, &chunk_size, sizeof(chunk_size)) == -1) { perror("setsockopt SO_RCVLOWAT"); @@ -288,7 +303,7 @@ static void do_accept(int fdlisten) perror("accept"); continue; } - res = pthread_create(&th, NULL, child_thread, + res = pthread_create(&th, &attr, child_thread, (void *)(unsigned long)fd); if (res) { errno = res; @@ -298,18 +313,42 @@ static void do_accept(int fdlisten) } } +/* Each thread should reserve a big enough vma to avoid + * spinlock collisions in ptl locks. + * This size is 2MB on x86_64, and is exported in /proc/meminfo. + */ +static unsigned long default_huge_page_size(void) +{ + FILE *f = fopen("/proc/meminfo", "r"); + unsigned long hps = 0; + size_t linelen = 0; + char *line = NULL; + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + free(line); + fclose(f); + return hps; +} + int main(int argc, char *argv[]) { struct sockaddr_storage listenaddr, addr; unsigned int max_pacing_rate = 0; - unsigned long total = 0; + size_t total = 0; char *host = NULL; int fd, c, on = 1; char *buffer; int sflg = 0; int mss = 0; - while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) { + while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { switch (c) { case '4': cfg_family = PF_INET; @@ -349,10 +388,24 @@ int main(int argc, char *argv[]) case 'P': max_pacing_rate = atoi(optarg) ; break; + case 'C': + chunk_size = atol(optarg); + break; + case 'a': + map_align = atol(optarg); + break; default: exit(1); } } + if (!map_align) { + map_align = default_huge_page_size(); + /* if really /proc/meminfo is not helping, + * we use the default x86_64 hugepagesize. + */ + if (!map_align) + map_align = 2*1024*1024; + } if (sflg) { int fdlisten = socket(cfg_family, SOCK_STREAM, 0); @@ -417,7 +470,7 @@ int main(int argc, char *argv[]) zflg = 0; } while (total < FILE_SZ) { - long wr = FILE_SZ - total; + ssize_t wr = FILE_SZ - total; if (wr > chunk_size) wr = chunk_size; diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 4c285b6e1db8..0ea44d975b6c 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -25,10 +25,6 @@ #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - FIXTURE(tls_basic) { int fd, cfd; @@ -268,6 +264,38 @@ TEST_F(tls, sendmsg_single) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +#define MAX_FRAGS 64 +#define SEND_LEN 13 +TEST_F(tls, sendmsg_fragmented) +{ + char const *test_str = "test_sendmsg"; + char buf[SEND_LEN * MAX_FRAGS]; + struct iovec vec[MAX_FRAGS]; + struct msghdr msg; + int i, frags; + + for (frags = 1; frags <= MAX_FRAGS; frags++) { + for (i = 0; i < frags; i++) { + vec[i].iov_base = (char *)test_str; + vec[i].iov_len = SEND_LEN; + } + + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = vec; + msg.msg_iovlen = frags; + + EXPECT_EQ(sendmsg(self->fd, &msg, 0), SEND_LEN * frags); + EXPECT_EQ(recv(self->cfd, buf, SEND_LEN * frags, MSG_WAITALL), + SEND_LEN * frags); + + for (i = 0; i < frags; i++) + EXPECT_EQ(memcmp(buf + SEND_LEN * i, + test_str, SEND_LEN), 0); + } +} +#undef MAX_FRAGS +#undef SEND_LEN + TEST_F(tls, sendmsg_large) { void *mem = malloc(16384); @@ -898,6 +926,114 @@ TEST_F(tls, nonblocking) } } +static void +test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, + bool sendpg, unsigned int n_readers, unsigned int n_writers) +{ + const unsigned int n_children = n_readers + n_writers; + const size_t data = 6 * 1000 * 1000; + const size_t file_sz = data / 100; + size_t read_bias, write_bias; + int i, fd, child_id; + char buf[file_sz]; + pid_t pid; + + /* Only allow multiples for simplicity */ + ASSERT_EQ(!(n_readers % n_writers) || !(n_writers % n_readers), true); + read_bias = n_writers / n_readers ?: 1; + write_bias = n_readers / n_writers ?: 1; + + /* prep a file to send */ + fd = open("/tmp/", O_TMPFILE | O_RDWR, 0600); + ASSERT_GE(fd, 0); + + memset(buf, 0xac, file_sz); + ASSERT_EQ(write(fd, buf, file_sz), file_sz); + + /* spawn children */ + for (child_id = 0; child_id < n_children; child_id++) { + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) + break; + } + + /* parent waits for all children */ + if (pid) { + for (i = 0; i < n_children; i++) { + int status; + + wait(&status); + EXPECT_EQ(status, 0); + } + + return; + } + + /* Split threads for reading and writing */ + if (child_id < n_readers) { + size_t left = data * read_bias; + char rb[8001]; + + while (left) { + int res; + + res = recv(self->cfd, rb, + left > sizeof(rb) ? sizeof(rb) : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } else { + size_t left = data * write_bias; + + while (left) { + int res; + + ASSERT_EQ(lseek(fd, 0, SEEK_SET), 0); + if (sendpg) + res = sendfile(self->fd, fd, NULL, + left > file_sz ? file_sz : left); + else + res = send(self->fd, buf, + left > file_sz ? file_sz : left, 0); + + EXPECT_GE(res, 0); + left -= res; + } + } +} + +TEST_F(tls, mutliproc_even) +{ + test_mutliproc(_metadata, self, false, 6, 6); +} + +TEST_F(tls, mutliproc_readers) +{ + test_mutliproc(_metadata, self, false, 4, 12); +} + +TEST_F(tls, mutliproc_writers) +{ + test_mutliproc(_metadata, self, false, 10, 2); +} + +TEST_F(tls, mutliproc_sendpage_even) +{ + test_mutliproc(_metadata, self, true, 6, 6); +} + +TEST_F(tls, mutliproc_sendpage_readers) +{ + test_mutliproc(_metadata, self, true, 4, 12); +} + +TEST_F(tls, mutliproc_sendpage_writers) +{ + test_mutliproc(_metadata, self, true, 10, 2); +} + TEST_F(tls, control_msg) { if (self->notls) @@ -1037,11 +1173,11 @@ TEST(non_established) { /* TLS ULP not supported */ if (errno == ENOENT) return; - EXPECT_EQ(errno, ENOTSUPP); + EXPECT_EQ(errno, ENOTCONN); ret = setsockopt(sfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); EXPECT_EQ(ret, -1); - EXPECT_EQ(errno, ENOTSUPP); + EXPECT_EQ(errno, ENOTCONN); ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh new file mode 100755 index 000000000000..de9ca97abc30 --- /dev/null +++ b/tools/testing/selftests/net/traceroute.sh @@ -0,0 +1,322 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run traceroute/traceroute6 tests +# + +VERBOSE=0 +PAUSE_ON_FAIL=no + +################################################################################ +# +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local ns + local cmd + local out + local rc + + ns="$1" + shift + cmd="$*" + + if [ "$VERBOSE" = "1" ]; then + printf " COMMAND: $cmd\n" + fi + + out=$(eval ip netns exec ${ns} ${cmd} 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# create namespaces and interconnects + +create_ns() +{ + local ns=$1 + local addr=$2 + local addr6=$3 + + [ -z "${addr}" ] && addr="-" + [ -z "${addr6}" ] && addr6="-" + + ip netns add ${ns} + + ip netns exec ${ns} ip link set lo up + if [ "${addr}" != "-" ]; then + ip netns exec ${ns} ip addr add dev lo ${addr} + fi + if [ "${addr6}" != "-" ]; then + ip netns exec ${ns} ip -6 addr add dev lo ${addr6} + fi + + ip netns exec ${ns} ip ro add unreachable default metric 8192 + ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 + + ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +# create veth pair to connect namespaces and apply addresses. +connect_ns() +{ + local ns1=$1 + local ns1_dev=$2 + local ns1_addr=$3 + local ns1_addr6=$4 + local ns2=$5 + local ns2_dev=$6 + local ns2_addr=$7 + local ns2_addr6=$8 + + ip netns exec ${ns1} ip li add ${ns1_dev} type veth peer name tmp + ip netns exec ${ns1} ip li set ${ns1_dev} up + ip netns exec ${ns1} ip li set tmp netns ${ns2} name ${ns2_dev} + ip netns exec ${ns2} ip li set ${ns2_dev} up + + if [ "${ns1_addr}" != "-" ]; then + ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr} + fi + + if [ "${ns2_addr}" != "-" ]; then + ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr} + fi + + if [ "${ns1_addr6}" != "-" ]; then + ip netns exec ${ns1} ip addr add dev ${ns1_dev} ${ns1_addr6} + fi + + if [ "${ns2_addr6}" != "-" ]; then + ip netns exec ${ns2} ip addr add dev ${ns2_dev} ${ns2_addr6} + fi +} + +################################################################################ +# traceroute6 test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# where H1's default route goes through R1 and R1's default route goes +# through R2 over N2, traceroute6 from H1 to H2 reports R2's address +# on N2 and not N1. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6() +{ + local ns + + for ns in host-1 host-2 router-1 router-2 + do + ip netns del ${ns} 2>/dev/null + done +} + +setup_traceroute6() +{ + brdev=br0 + + # start clean + cleanup_traceroute6 + + set -e + create_ns host-1 + create_ns host-2 + create_ns router-1 + create_ns router-2 + + # Setup N3 + connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 + ip netns exec host-2 ip route add default via 2000:103::2 + + # Setup N2 + connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 + ip netns exec router-1 ip route add default via 2000:102::2 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip netns exec router-1 ip link add name ${brdev} type bridge + ip netns exec router-1 ip link set ${brdev} up + ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + + connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - + ip netns exec router-1 ip link set dev eth0 master ${brdev} + ip netns exec host-1 ip route add default via 2000:101::1 + + connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - + ip netns exec router-1 ip link set dev eth1 master ${brdev} + + # Prime the network + ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + + set +e +} + +run_traceroute6() +{ + if [ ! -x "$(command -v traceroute6)" ]; then + echo "SKIP: Could not run IPV6 test without traceroute6" + return + fi + + setup_traceroute6 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + log_test $? 0 "IPV6 traceroute" + + cleanup_traceroute6 +} + +################################################################################ +# traceroute test +# +# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# +# 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and +# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary +# address on N1. +# + +cleanup_traceroute() +{ + local ns + + for ns in host-1 host-2 router + do + ip netns del ${ns} 2>/dev/null + done +} + +setup_traceroute() +{ + # start clean + cleanup_traceroute + + set -e + create_ns host-1 + create_ns host-2 + create_ns router + + connect_ns host-1 eth0 1.0.1.3/24 - \ + router eth1 1.0.3.1/24 - + ip netns exec host-1 ip route add default via 1.0.1.1 + + ip netns exec router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec router sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns host-2 eth0 1.0.2.4/24 - \ + router eth2 1.0.2.1/24 - + ip netns exec host-2 ip route add default via 1.0.2.1 + + # Prime the network + ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + + set +e +} + +run_traceroute() +{ + if [ ! -x "$(command -v traceroute)" ]; then + echo "SKIP: Could not run IPV4 test without traceroute" + return + fi + + setup_traceroute + + # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. + run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + log_test $? 0 "IPV4 traceroute" + + cleanup_traceroute +} + +################################################################################ +# Run tests + +run_tests() +{ + run_traceroute6 + run_traceroute +} + +################################################################################ +# main + +declare -i nfail=0 +declare -i nsuccess=0 + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + *) exit 1;; + esac +done + +run_tests + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index b8265ee9923f..c66da6ffd6d8 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -89,12 +89,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V4, .gso_len = CONST_MSS_V4, - .tfail = true, .r_num_mss = 1, }, { @@ -139,10 +136,9 @@ struct testcase testcases_v4[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { @@ -196,12 +192,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single MSS: will fail with GSO, because the segment - * logic in udp4_ufo_fragment demands a gso skb to be > MTU - */ + /* send a single MSS: will fall back to no GSO */ .tlen = CONST_MSS_V6, .gso_len = CONST_MSS_V6, - .tfail = true, .r_num_mss = 1, }, { @@ -246,10 +239,9 @@ struct testcase testcases_v6[] = { .tfail = true, }, { - /* send a single 1B MSS: will fail, see single MSS above */ + /* send a single 1B MSS: will fall back to no GSO */ .tlen = 1, .gso_len = 1, - .tfail = true, .r_num_mss = 1, }, { @@ -448,7 +440,8 @@ static bool __send_one(int fd, struct msghdr *msg, int flags) if (ret == -1) error(1, errno, "sendmsg"); if (ret != msg->msg_iov->iov_len) - error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len); + error(1, 0, "sendto: %d != %llu", ret, + (unsigned long long)msg->msg_iov->iov_len); if (msg->msg_flags) error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags); diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index ada99496634a..17512a43885e 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -405,7 +405,8 @@ static int send_udp_segment(int fd, char *data) if (ret == -1) error(1, errno, "sendmsg"); if (ret != iov.iov_len) - error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len); + error(1, 0, "sendmsg: %u != %llu\n", ret, + (unsigned long long)iov.iov_len); return 1; } diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index 5445943bf07f..7a1bf94c5bd3 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -106,6 +106,13 @@ do_overlap() # # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block + + # similar to above: add policies (with partially random address), with shrinking prefixes. + for p in 29 28 27;do + for k in $(seq 1 32); do + ip -net $ns xfrm policy add src 10.253.1.$((RANDOM%255))/$p dst 10.254.1.$((RANDOM%255))/$p dir fwd priority $((200+k)) action block 2>/dev/null + done + done } do_esp_policy_get_check() { diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 4144984ebee5..08194aa44006 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -2,6 +2,7 @@ # Makefile for netfilter selftests TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ - conntrack_icmp_related.sh nft_flowtable.sh + conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ + nft_concat_range.sh include ../lib.mk diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh new file mode 100755 index 000000000000..c3b8f90c497e --- /dev/null +++ b/tools/testing/selftests/netfilter/ipvs.sh @@ -0,0 +1,228 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--------------------------------------------------------------+ +# | | +# ns0 | ns1 | +# ----------- | ----------- ----------- | +# | veth01 | --------- | veth10 | | veth12 | | +# ----------- peer ----------- ----------- | +# | | | | +# ----------- | | | +# | br0 | |----------------- peer |--------------| +# ----------- | | | +# | | | | +# ---------- peer ---------- ----------- | +# | veth02 | --------- | veth20 | | veth21 | | +# ---------- | ---------- ----------- | +# | ns2 | +# | | +#--------------------------------------------------------------+ +# +# We assume that all network driver are loaded +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + modprobe -q ip_vs + if [ $? -ne 0 ]; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ipvsadm -v > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "SKIP: Could not run test without ipvsadm" + exit $ksft_skip +fi + +setup() { + ip netns add ns0 + ip netns add ns1 + ip netns add ns2 + + ip link add veth01 netns ns0 type veth peer name veth10 netns ns1 + ip link add veth02 netns ns0 type veth peer name veth20 netns ns2 + ip link add veth12 netns ns1 type veth peer name veth21 netns ns2 + + ip netns exec ns0 ip link set veth01 up + ip netns exec ns0 ip link set veth02 up + ip netns exec ns0 ip link add br0 type bridge + ip netns exec ns0 ip link set veth01 master br0 + ip netns exec ns0 ip link set veth02 master br0 + ip netns exec ns0 ip link set br0 up + ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0 + + ip netns exec ns1 ip link set lo up + ip netns exec ns1 ip link set veth10 up + ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10 + ip netns exec ns1 ip link set veth12 up + ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12 + + ip netns exec ns2 ip link set lo up + ip netns exec ns2 ip link set veth21 up + ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21 + ip netns exec ns2 ip link set veth20 up + ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + for i in 0 1 2 + do + ip netns del ns$i > /dev/null 2>&1 + done + + if [ -f "${outfile}" ]; then + rm "${outfile}" + fi + if [ -f "${infile}" ]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec ns2 nc -l -p 8080 > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}" +} + +verify_data() { + wait "${server_pid}" + cmp "$infile" "$outfile" 2>/dev/null +} + +test_service() { + server_listen + client_connect + verify_data +} + + +test_dr() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + # avoid incorrect arp response + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 + # avoid reverse route lookup + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + + test_service +} + +test_nat() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 ip link del veth20 + ip netns exec ns2 ip route add default via ${dip_v4} dev veth21 + + test_service +} + +test_tun() { + ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0 + + ip netns exec ns1 modprobe ipip + ip netns exec ns1 ip link set tunl0 up + ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr + ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port} + ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec ns2 modprobe ipip + ip netns exec ns2 ip link set tunl0 up + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 + ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1 + + test_service +} + +run_tests() { + local errors= + + echo "Testing DR mode..." + cleanup + setup + test_dr + errors=$(( $errors + $? )) + + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + + return $errors +} + +trap cleanup EXIT + +run_tests + +if [ $? -ne 0 ]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit 1 +fi +echo -e "$(basename $0): ${GREEN}PASS${NC}" +exit 0 diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh new file mode 100755 index 000000000000..aca21dde102a --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -0,0 +1,1481 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# nft_concat_range.sh - Tests for sets with concatenation of ranged fields +# +# Copyright (c) 2019 Red Hat GmbH +# +# Author: Stefano Brivio <sbrivio@redhat.com> +# +# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031 +# ^ Configuration and templates sourced with eval, counters reused in subshells + +KSELFTEST_SKIP=4 + +# Available test groups: +# - correctness: check that packets match given entries, and only those +# - concurrency: attempt races between insertion, deletion and lookup +# - timeout: check that packets match entries until they expire +# - performance: estimate matching rate, compare with rbtree and hash baselines +TESTS="correctness concurrency timeout" +[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance" + +# Set types, defined by TYPE_ variables below +TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto + net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port + net_port_mac_proto_net" + +# List of possible paths to pktgen script from kernel tree for performance tests +PKTGEN_SCRIPT_PATHS=" + ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + pktgen/pktgen_bench_xmit_mode_netif_receive.sh" + +# Definition of set types: +# display display text for test report +# type_spec nftables set type specifier +# chain_spec nftables type specifier for rules mapping to set +# dst call sequence of format_*() functions for destination fields +# src call sequence of format_*() functions for source fields +# start initial integer used to generate addresses and ports +# count count of entries to generate and match +# src_delta number summed to destination generator for source fields +# tools list of tools for correctness and timeout tests, any can be used +# proto L4 protocol of test packets +# +# race_repeat race attempts per thread, 0 disables concurrency test for type +# flood_tools list of tools for concurrency tests, any can be used +# flood_proto L4 protocol of test packets for concurrency tests +# flood_spec nftables type specifier for concurrency tests +# +# perf_duration duration of single pktgen injection test +# perf_spec nftables type specifier for performance tests +# perf_dst format_*() functions for destination fields in performance test +# perf_src format_*() functions for source fields in performance test +# perf_entries number of set entries for performance test +# perf_proto L3 protocol of test packets +TYPE_net_port=" +display net,port +type_spec ipv4_addr . inet_service +chain_spec ip daddr . udp dport +dst addr4 port +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec ip daddr . udp dport + +perf_duration 5 +perf_spec ip daddr . udp dport +perf_dst addr4 port +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_port_net=" +display port,net +type_spec inet_service . ipv4_addr +chain_spec udp dport . ip daddr +dst port addr4 +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec udp dport . ip daddr + +perf_duration 5 +perf_spec udp dport . ip daddr +perf_dst port addr4 +perf_src +perf_entries 100 +perf_proto ipv4 +" + +TYPE_net6_port=" +display net6,port +type_spec ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport +dst addr6 port +src +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . udp dport + +perf_duration 5 +perf_spec ip6 daddr . udp dport +perf_dst addr6 port +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_port_proto=" +display port,proto +type_spec inet_service . inet_proto +chain_spec udp dport . meta l4proto +dst port proto +src +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec udp dport . meta l4proto +perf_dst port proto +perf_src +perf_entries 30000 +perf_proto ipv4 +" + +TYPE_net6_port_mac=" +display net6,port,mac +type_spec ipv6_addr . inet_service . ether_addr +chain_spec ip6 daddr . udp dport . ether saddr +dst addr6 port +src mac +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr +perf_dst addr6 port mac +perf_src +perf_entries 10 +perf_proto ipv6 +" + +TYPE_net6_port_mac_proto=" +display net6,port,mac,proto +type_spec ipv6_addr . inet_service . ether_addr . inet_proto +chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto +dst addr6 port +src mac proto +start 10 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto +perf_dst addr6 port mac proto +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_net_port_net=" +display net,port,net +type_spec ipv4_addr . inet_service . ipv4_addr +chain_spec ip daddr . udp dport . ip saddr +dst addr4 port +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . udp dport . ip saddr + +perf_duration 0 +" + +TYPE_net6_port_net6_port=" +display net6,port,net6,port +type_spec ipv6_addr . inet_service . ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport +dst addr6 port +src addr6 port +start 10 +count 5 +src_delta 2000 +tools sendip nc +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport + +perf_duration 0 +" + +TYPE_net_port_mac_proto_net=" +display net,port,mac,proto,net +type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr +dst addr4 port +src mac proto addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac=" +display net,mac +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec ip daddr . ether daddr +perf_dst addr4 mac +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_net_mac_icmp=" +display net,mac - ICMP +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools ping +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net6_mac_icmp=" +display net6,mac - ICMPv6 +type_spec ipv6_addr . ether_addr +chain_spec ip6 daddr . ether saddr +dst addr6 +src mac +start 10 +count 50 +src_delta 2000 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_port_proto_net=" +display net,port,proto,net +type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . meta l4proto . ip saddr +dst addr4 port proto +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip nc +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . tcp dport . meta l4proto . ip saddr + +perf_duration 0 +" + +# Set template for all tests, types and rules are filled in depending on test +set_template=' +flush ruleset + +table inet filter { + counter test { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval,timeout + } + + chain input { + type filter hook prerouting priority 0; policy accept; + ${chain_spec} @test counter name \"test\" + } +} + +table netdev perf { + counter test { + packets 0 bytes 0 + } + + counter match { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval + } + + set norange { + type ${type_spec} + } + + set noconcat { + type ${type_spec%% *} + flags interval + } + + chain test { + type filter hook ingress device veth_a priority 0; + } +} +' + +err_buf= +info_buf= + +# Append string to error buffer +err() { + err_buf="${err_buf}${1} +" +} + +# Append string to information buffer +info() { + info_buf="${info_buf}${1} +" +} + +# Flush error buffer to stdout +err_flush() { + printf "%s" "${err_buf}" + err_buf= +} + +# Flush information buffer to stdout +info_flush() { + printf "%s" "${info_buf}" + info_buf= +} + +# Setup veth pair: this namespace receives traffic, B generates it +setup_veth() { + ip netns add B + ip link add veth_a type veth peer name veth_b || return 1 + + ip link set veth_a up + ip link set veth_b netns B + + ip -n B link set veth_b up + + ip addr add dev veth_a 10.0.0.1 + ip route add default dev veth_a + + ip -6 addr add fe80::1/64 dev veth_a nodad + ip -6 addr add 2001:db8::1/64 dev veth_a nodad + ip -6 route add default dev veth_a + + ip -n B route add default dev veth_b + + ip -6 -n B addr add fe80::2/64 dev veth_b nodad + ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad + ip -6 -n B route add default dev veth_b + + B() { + ip netns exec B "$@" >/dev/null 2>&1 + } + + sleep 2 +} + +# Fill in set template and initialise set +setup_set() { + eval "echo \"${set_template}\"" | nft -f - +} + +# Check that at least one of the needed tools is available +check_tools() { + __tools= + for tool in ${tools}; do + if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \ + ! nc -u -w0 1.1.1.1 1 2>/dev/null; then + # Some GNU netcat builds might not support IPv6 + __tools="${__tools} netcat-openbsd" + continue + fi + __tools="${__tools} ${tool}" + + command -v "${tool}" >/dev/null && return 0 + done + err "need one of:${__tools}, skipping" && return 1 +} + +# Set up function to send ICMP packets +setup_send_icmp() { + send_icmp() { + B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1 + } +} + +# Set up function to send ICMPv6 packets +setup_send_icmp6() { + if command -v ping6 >/dev/null; then + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping6 -q -c1 -W1 "${dst_addr6}" + } + else + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping -q -6 -c1 -W1 "${dst_addr6}" + } + fi +} + +# Set up function to send single UDP packets on IPv4 +setup_send_udp() { + if command -v sendip >/dev/null; then + send_udp() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}" + + # shellcheck disable=SC2086 # sendip needs split options + B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \ + ${dst_port} "${dst_addr4}" + + src_port= + dst_port= + src_addr4= + } + elif command -v nc >/dev/null; then + if nc -u -w0 1.1.1.1 1 2>/dev/null; then + # OpenBSD netcat + nc_opt="-w0" + else + # GNU netcat + nc_opt="-q0" + fi + + send_udp() { + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}" dev veth_b + __src_addr4="-s ${src_addr4}" + fi + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + [ -n "${src_port}" ] && src_port="-p ${src_port}" + + echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \ + "${src_port}" "${dst_addr4}" "${dst_port}" + + src_addr4= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp() { + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + B ip route add default dev veth_b + fi + + B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}" + + if [ -n "${src_addr4}" ]; then + B ip addr del "${src_addr4}/16" dev veth_b + fi + src_addr4= + } + else + return 1 + fi +} + +# Set up function to send single UDP packets on IPv6 +setup_send_udp6() { + if command -v sendip >/dev/null; then + send_udp6() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + if [ -n "${src_addr6}" ]; then + src_addr6="-6s ${src_addr6}" + else + src_addr6="-6s 2001:db8::2" + fi + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \ + ${dst_port} "${dst_addr6}" + + src_port= + dst_port= + src_addr6= + } + elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then + # GNU netcat might not work with IPv6, try next tool + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + [ -n "${src_port}" ] && src_port="-p ${src_port}" + + # shellcheck disable=SC2086 # this needs split options + echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \ + ${dst_addr6} ${dst_port} + + src_addr6= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ip addr add "${src_addr6}" dev veth_b nodad + B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}" + ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv4 +setup_flood_tcp() { + if command -v iperf3 >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b 2>/dev/null + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \ + ${src_addr4} -l16 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \ + -l20 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t TCP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv6 +setup_flood_tcp6() { + if command -v iperf3 >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr6}" ${dst_port} \ + ${src_port} ${src_addr6} -l16 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_addr6="${src_addr6}:${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr6}" -V ${dst_port} \ + ${src_addr6} -l1 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp6() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -6 ${dst_port} -L "${dst_addr6}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -6 -H "${dst_addr6}" ${dst_port} \ + -L "${src_addr6}" -l 1000 -t TCP_STREAM + + src_addr6= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send UDP traffic on IPv4 +setup_flood_udp() { + if command -v iperf3 >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \ + ${dst_port} ${src_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \ + ${dst_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_udp() { + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + sleep 2 + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t UDP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Find pktgen script and set up function to start pktgen injection +setup_perf() { + for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do + command -v "${pktgen_script_path}" >/dev/null && break + done + [ "${pktgen_script_path}" = "__notfound" ] && return 1 + + perf_ipv4() { + ${pktgen_script_path} -s80 \ + -i veth_a -d "${dst_addr4}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } + perf_ipv6() { + IP6=6 ${pktgen_script_path} -s100 \ + -i veth_a -d "${dst_addr6}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } +} + +# Clean up before each test +cleanup() { + nft reset counter inet filter test >/dev/null 2>&1 + nft flush ruleset >/dev/null 2>&1 + ip link del dummy0 2>/dev/null + ip route del default 2>/dev/null + ip -6 route del default 2>/dev/null + ip netns del B 2>/dev/null + ip link del veth_a 2>/dev/null + timeout= + killall iperf3 2>/dev/null + killall iperf 2>/dev/null + killall netperf 2>/dev/null + killall netserver 2>/dev/null + rm -f ${tmp} + sleep 2 +} + +# Entry point for setup functions +setup() { + if [ "$(id -u)" -ne 0 ]; then + echo " need to run as root" + exit ${KSELFTEST_SKIP} + fi + + cleanup + check_tools || return 1 + for arg do + if ! eval setup_"${arg}"; then + err " ${arg} not supported" + return 1 + fi + done +} + +# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it +format_addr4() { + a=$((${1} + 16777216 * 10 + 5)) + printf "%i.%i.%i.%i" \ + "$((a / 16777216))" "$((a % 16777216 / 65536))" \ + "$((a % 65536 / 256))" "$((a % 256))" +} + +# Format integer into IPv6 address, summing 2001:db8:: to it +format_addr6() { + printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))" +} + +# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it +format_mac() { + printf "00:01:%02x:%02x:%02x:%02x" \ + "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \ + "$((${1} % 65536 / 256))" "$((${1} % 256))" +} + +# Format integer into port, avoid 0 port +format_port() { + printf "%i" "$((${1} % 65534 + 1))" +} + +# Drop suffixed '6' from L4 protocol, if any +format_proto() { + printf "%s" "${proto}" | tr -d 6 +} + +# Format destination and source fields into nft concatenated type +format() { + __start= + __end= + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + for f in ${src}; do + __expr="${__expr} . " + __start="$(eval format_"${f}" "${srcstart}")" + __end="$(eval format_"${f}" "${srcend}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + + if [ -n "${timeout}" ]; then + echo "${__expr} timeout ${timeout}s }" + else + echo "${__expr} }" + fi +} + +# Format destination and source fields into nft type, start element only +format_norange() { + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __expr="${__expr}$(eval format_"${f}" "${start}")" + done + for f in ${src}; do + __expr="${__expr} . $(eval format_"${f}" "${start}")" + done + + echo "${__expr} }" +} + +# Format first destination field into nft type +format_noconcat() { + for f in ${dst}; do + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + echo "{ ${__start} }" + else + echo "{ ${__start}-${__end} }" + fi + return + done +} + +# Add single entry to 'test' set in 'inet filter' table +add() { + if ! nft add element inet filter test "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Format and output entries for sets in 'netdev perf' table +add_perf() { + if [ "${1}" = "test" ]; then + echo "add element netdev perf test $(format)" + elif [ "${1}" = "norange" ]; then + echo "add element netdev perf norange $(format_norange)" + elif [ "${1}" = "noconcat" ]; then + echo "add element netdev perf noconcat $(format_noconcat)" + fi +} + +# Add single entry to 'norange' set in 'netdev perf' table +add_perf_norange() { + if ! nft add element netdev perf norange "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Add single entry to 'noconcat' set in 'netdev perf' table +add_perf_noconcat() { + if ! nft add element netdev perf noconcat "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Delete single entry from set +del() { + if ! nft delete element inet filter test "${1}"; then + err "Failed to delete ${1} given ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets() { + found=0 + for token in $(nft list counter inet filter test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'netdev perf' table +count_perf_packets() { + found=0 + for token in $(nft list counter netdev perf test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Set MAC addresses, send traffic according to specifier +flood() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval flood_\$proto +} + +# Set MAC addresses, start pktgen injection +perf() { + dst_mac="$(format_mac "${1}")" + ip link set veth_a address "${dst_mac}" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval perf_\$perf_proto +} + +# Set MAC addresses, send single packet, check that it matches, reset counter +send_match() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "1" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should have matched ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi + nft reset counter inet filter test >/dev/null +} + +# Set MAC addresses, send single packet, check that it doesn't match +send_nomatch() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "0" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should not have matched ruleset:" + err "$(nft list ruleset -a)" + return 1 + fi +} + +# Correctness test template: +# - add ranged element, check that packets match it +# - check that packets outside range don't match it +# - remove some elements, check that packets don't match anymore +test_correctness() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + + # Delete elements now and then + if [ $((i % 3)) -eq 0 ]; then + del "$(format)" || return 1 + for j in $(seq ${start} \ + $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) \ + || return 1 + done + fi + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Concurrency test template: +# - add all the elements +# - start a thread for each physical thread that: +# - adds all the elements +# - flushes the set +# - adds all the elements +# - flushes the entire ruleset +# - adds the set back +# - adds all the elements +# - delete all the elements +test_concurrency() { + proto=${flood_proto} + tools=${flood_tools} + chain_spec=${flood_spec} + setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP} + + range_size=1 + cstart=${start} + flood_pids= + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!" + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + sleep 10 + + pids= + for c in $(seq 1 "$(nproc)"); do ( + for r in $(seq 1 "${race_repeat}"); do + range_size=1 + + # $start needs to be local to this subshell + # shellcheck disable=SC2030 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush inet filter test 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset + setup set 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + range_size=1 + start=${cstart} + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + del "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + done + ) & pids="${pids} $!" + done + + # shellcheck disable=SC2046,SC2086 # word splitting wanted here + wait $(for pid in ${pids}; do echo ${pid}; done) + # shellcheck disable=SC2046,SC2086 + kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + # shellcheck disable=SC2046,SC2086 + wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + + return 0 +} + +# Timeout test template: +# - add all the elements with 3s timeout while checking that packets match +# - wait 3s after the last insertion, check that packets don't match any entry +test_timeout() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + + timeout=3 + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + sleep 3 + for i in $(seq ${start} $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Performance test template: +# - add concatenated ranged entries +# - add non-ranged concatenated entries (for hash set matching rate baseline) +# - add ranged entries with first field only (for rbhash baseline) +# - start pktgen injection directly on device rx path of this namespace +# - measure drop only rate, hash and rbtree baselines, then matching rate +test_performance() { + chain_spec=${perf_spec} + dst="${perf_dst}" + src="${perf_src}" + setup veth perf set || return ${KSELFTEST_SKIP} + + first=${start} + range_size=1 + for set in test norange noconcat; do + start=${first} + for i in $(seq ${start} $((start + perf_entries))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + elif [ ${start} -eq ${end} ]; then + end=$((start + 1)) + fi + + add_perf ${set} + + start=$((end + range_size)) + done > "${tmp}" + nft -f "${tmp}" + done + + perf $((end - 1)) ${srcstart} + + sleep 2 + + nft add rule netdev perf test counter name \"test\" drop + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline (drop from netdev hook): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @norange \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline hash (non-ranged entries): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline rbtree (match on first field only): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @test \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + p5="$(printf %5s "${perf_entries}")" + info " set with ${p5} full, ranged entries: ${pps}pps" + kill "${perf_pid}" +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } +tmp="$(mktemp)" +trap cleanup EXIT + +# Entry point for test runs +passed=0 +for name in ${TESTS}; do + printf "TEST: %s\n" "${name}" + for type in ${TYPES}; do + eval desc=\$TYPE_"${type}" + IFS=' +' + for __line in ${desc}; do + # shellcheck disable=SC2086 + eval ${__line%% *}=\"${__line##* }\"; + done + IFS=' +' + + if [ "${name}" = "concurrency" ] && \ + [ "${race_repeat}" = "0" ]; then + continue + fi + if [ "${name}" = "performance" ] && \ + [ "${perf_duration}" = "0" ]; then + continue + fi + + printf " %-60s " "${display}" + eval test_"${name}" + ret=$? + + if [ $ret -eq 0 ]; then + printf "[ OK ]\n" + info_flush + passed=$((passed + 1)) + elif [ $ret -eq 1 ]; then + printf "[FAIL]\n" + err_flush + exit 1 + elif [ $ret -eq ${KSELFTEST_SKIP} ]; then + printf "[SKIP]\n" + err_flush + fi + done +done + +[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 16571ac1dab4..d3e0809ab368 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -226,17 +226,19 @@ check_transfer() return 0 } -test_tcp_forwarding() +test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 + local dstip=$3 + local dstport=$4 local lret=0 ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & lpid=$! sleep 1 - ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" & + ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & cpid=$! sleep 3 @@ -258,6 +260,28 @@ test_tcp_forwarding() return $lret } +test_tcp_forwarding() +{ + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + + return $? +} + +test_tcp_forwarding_nat() +{ + local lret + + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + lret=$? + + if [ $lret -eq 0 ] ; then + test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + lret=$? + fi + + return $lret +} + make_file "$ns1in" "ns1" make_file "$ns2in" "ns2" @@ -283,14 +307,19 @@ ip -net ns2 route add 192.168.10.1 via 10.0.2.1 # Same, but with NAT enabled. ip netns exec nsr1 nft -f - <<EOF table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + chain postrouting { type nat hook postrouting priority 0; policy accept; - meta oifname "veth1" masquerade + meta oifname "veth1" counter masquerade } } EOF -test_tcp_forwarding ns1 ns2 +test_tcp_forwarding_nat ns1 ns2 if [ $? -eq 0 ] ;then echo "PASS: flow offloaded for ns1/ns2 with NAT" @@ -313,7 +342,7 @@ fi ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -test_tcp_forwarding ns1 ns2 +test_tcp_forwarding_nat ns1 ns2 if [ $? -eq 0 ] ;then echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" else diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 1be55e705780..d7e07f4c3d7f 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -8,9 +8,14 @@ ksft_skip=4 ret=0 test_inet_nat=true +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" +ns2="ns2-$sfx" + cleanup() { - for i in 0 1 2; do ip netns del ns$i;done + for i in 0 1 2; do ip netns del ns$i-"$sfx";done } nft --version > /dev/null 2>&1 @@ -25,40 +30,49 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -ip netns add ns0 +ip netns add "$ns0" if [ $? -ne 0 ];then - echo "SKIP: Could not create net namespace" + echo "SKIP: Could not create net namespace $ns0" exit $ksft_skip fi trap cleanup EXIT -ip netns add ns1 -ip netns add ns2 +ip netns add "$ns1" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns1" + exit $ksft_skip +fi + +ip netns add "$ns2" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns2" + exit $ksft_skip +fi -ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1 +ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: No virtual ethernet pair device support in kernel" exit $ksft_skip fi -ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 +ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2" -ip -net ns0 link set lo up -ip -net ns0 link set veth0 up -ip -net ns0 addr add 10.0.1.1/24 dev veth0 -ip -net ns0 addr add dead:1::1/64 dev veth0 +ip -net "$ns0" link set lo up +ip -net "$ns0" link set veth0 up +ip -net "$ns0" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns0" addr add dead:1::1/64 dev veth0 -ip -net ns0 link set veth1 up -ip -net ns0 addr add 10.0.2.1/24 dev veth1 -ip -net ns0 addr add dead:2::1/64 dev veth1 +ip -net "$ns0" link set veth1 up +ip -net "$ns0" addr add 10.0.2.1/24 dev veth1 +ip -net "$ns0" addr add dead:2::1/64 dev veth1 for i in 1 2; do - ip -net ns$i link set lo up - ip -net ns$i link set eth0 up - ip -net ns$i addr add 10.0.$i.99/24 dev eth0 - ip -net ns$i route add default via 10.0.$i.1 - ip -net ns$i addr add dead:$i::99/64 dev eth0 - ip -net ns$i route add default via dead:$i::1 + ip -net ns$i-$sfx link set lo up + ip -net ns$i-$sfx link set eth0 up + ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0 + ip -net ns$i-$sfx route add default via 10.0.$i.1 + ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0 + ip -net ns$i-$sfx route add default via dead:$i::1 done bad_counter() @@ -66,8 +80,9 @@ bad_counter() local ns=$1 local counter=$2 local expect=$3 + local tag=$4 - echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2 + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2 ip netns exec $ns nft list counter inet filter $counter 1>&2 } @@ -78,24 +93,24 @@ check_counters() cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84") if [ $? -ne 0 ]; then - bad_counter $ns ns0in "packets 1 bytes 84" + bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1" lret=1 fi cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84") if [ $? -ne 0 ]; then - bad_counter $ns ns0out "packets 1 bytes 84" + bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2" lret=1 fi expect="packets 1 bytes 104" cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter $ns ns0in6 "$expect" + bad_counter $ns ns0in6 "$expect" "check_counters 3" lret=1 fi cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter $ns ns0out6 "$expect" + bad_counter $ns ns0out6 "$expect" "check_counters 4" lret=1 fi @@ -107,41 +122,41 @@ check_ns0_counters() local ns=$1 local lret=0 - cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0") if [ $? -ne 0 ]; then - bad_counter ns0 ns0in "packets 0 bytes 0" + bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1" lret=1 fi - cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0") if [ $? -ne 0 ]; then - bad_counter ns0 ns0in6 "packets 0 bytes 0" + bad_counter "$ns0" ns0in6 "packets 0 bytes 0" lret=1 fi - cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0") if [ $? -ne 0 ]; then - bad_counter ns0 ns0out "packets 0 bytes 0" + bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2" lret=1 fi - cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0") if [ $? -ne 0 ]; then - bad_counter ns0 ns0out6 "packets 0 bytes 0" + bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 " lret=1 fi for dir in "in" "out" ; do expect="packets 1 bytes 84" - cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 $ns$dir "$expect" + bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4" lret=1 fi expect="packets 1 bytes 104" - cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 $ns$dir6 "$expect" + bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5" lret=1 fi done @@ -152,7 +167,7 @@ check_ns0_counters() reset_counters() { for i in 0 1 2;do - ip netns exec ns$i nft reset counters inet > /dev/null + ip netns exec ns$i-$sfx nft reset counters inet > /dev/null done } @@ -166,7 +181,7 @@ test_local_dnat6() IPF="ip6" fi -ip netns exec ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { chain output { type nat hook output priority 0; policy accept; @@ -180,7 +195,7 @@ EOF fi # ping netns1, expect rewrite to netns2 - ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null + ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null if [ $? -ne 0 ]; then lret=1 echo "ERROR: ping6 failed" @@ -189,18 +204,18 @@ EOF expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns1$dir "$expect" + bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1" lret=1 fi done expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns2$dir "$expect" + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2" lret=1 fi done @@ -208,9 +223,9 @@ EOF # expect 0 count in ns1 expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3" lret=1 fi done @@ -218,15 +233,15 @@ EOF # expect 1 packet in ns2 expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns0$dir "$expect" + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4" lret=1 fi done - test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2" - ip netns exec ns0 nft flush chain ip6 nat output + test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2" + ip netns exec "$ns0" nft flush chain ip6 nat output return $lret } @@ -241,7 +256,7 @@ test_local_dnat() IPF="ip" fi -ip netns exec ns0 nft -f - <<EOF 2>/dev/null +ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null table $family nat { chain output { type nat hook output priority 0; policy accept; @@ -260,7 +275,7 @@ EOF fi # ping netns1, expect rewrite to netns2 - ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null if [ $? -ne 0 ]; then lret=1 echo "ERROR: ping failed" @@ -269,18 +284,18 @@ EOF expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns1$dir "$expect" + bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1" lret=1 fi done expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns2$dir "$expect" + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2" lret=1 fi done @@ -288,9 +303,9 @@ EOF # expect 0 count in ns1 expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3" lret=1 fi done @@ -298,19 +313,19 @@ EOF # expect 1 packet in ns2 expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns0$dir "$expect" + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4" lret=1 fi done - test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2" + test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2" - ip netns exec ns0 nft flush chain $family nat output + ip netns exec "$ns0" nft flush chain $family nat output reset_counters - ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null + ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null if [ $? -ne 0 ]; then lret=1 echo "ERROR: ping failed" @@ -319,17 +334,17 @@ EOF expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns1$dir "$expect" + bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5" lret=1 fi done expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns2$dir "$expect" + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6" lret=1 fi done @@ -337,9 +352,9 @@ EOF # expect 1 count in ns1 expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns0 ns0$dir "$expect" + bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7" lret=1 fi done @@ -347,14 +362,14 @@ EOF # expect 0 packet in ns2 expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns2$dir "$expect" + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8" lret=1 fi done - test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush" + test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush" return $lret } @@ -366,26 +381,26 @@ test_masquerade6() local natflags=$2 local lret=0 - ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" return 1 lret=1 fi expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns2$dir "$expect" + bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2" lret=1 fi done @@ -393,7 +408,7 @@ test_masquerade6() reset_counters # add masquerading rule -ip netns exec ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { chain postrouting { type nat hook postrouting priority 0; policy accept; @@ -406,24 +421,24 @@ EOF return $ksft_skip fi - ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" lret=1 fi # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4" lret=1 fi done @@ -431,32 +446,32 @@ EOF # ns1 should not have seen packets from ns2, due to masquerade expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5" lret=1 fi - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6" lret=1 fi done - ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)" + echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain $family nat postrouting + ip netns exec "$ns0" nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for ns2" + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2" return $lret } @@ -467,26 +482,26 @@ test_masquerade() local natflags=$2 local lret=0 - ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null - ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 $natflags" + echo "ERROR: cannot ping $ns1 from "$ns2" $natflags" lret=1 fi expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns2$dir "$expect" + bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2" lret=1 fi done @@ -494,7 +509,7 @@ test_masquerade() reset_counters # add masquerading rule -ip netns exec ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { chain postrouting { type nat hook postrouting priority 0; policy accept; @@ -507,24 +522,24 @@ EOF return $ksft_skip fi - ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags" + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" lret=1 fi # ns1 should have seen packets from ns0, due to masquerade expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4" lret=1 fi done @@ -532,32 +547,32 @@ EOF # ns1 should not have seen packets from ns2, due to masquerade expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5" lret=1 fi - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6" lret=1 fi done - ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)" + echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)" lret=1 fi - ip netns exec ns0 nft flush chain $family nat postrouting + ip netns exec "$ns0" nft flush chain $family nat postrouting if [ $? -ne 0 ]; then echo "ERROR: Could not flush $family nat postrouting" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for ns2" + test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2" return $lret } @@ -567,25 +582,25 @@ test_redirect6() local family=$1 local lret=0 - ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null - ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannnot ping ns1 from ns2 via ipv6" + echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" lret=1 fi expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns2$dir "$expect" + bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2" lret=1 fi done @@ -593,7 +608,7 @@ test_redirect6() reset_counters # add redirect rule -ip netns exec ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -606,18 +621,18 @@ EOF return $ksft_skip fi - ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect" lret=1 fi # ns1 should have seen no packets from ns2, due to redirection expect="packets 0 bytes 0" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3" lret=1 fi done @@ -625,20 +640,20 @@ EOF # ns0 should have seen packets from ns2, due to masquerade expect="packets 1 bytes 104" for dir in "in6" "out6" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4" lret=1 fi done - ip netns exec ns0 nft delete table $family nat + ip netns exec "$ns0" nft delete table $family nat if [ $? -ne 0 ]; then echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2" return $lret } @@ -648,26 +663,26 @@ test_redirect() local family=$1 local lret=0 - ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null - ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null - ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2" + echo "ERROR: cannot ping $ns1 from $ns2" lret=1 fi expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns2$dir "$expect" + bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1" lret=1 fi - cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns2 ns1$dir "$expect" + bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2" lret=1 fi done @@ -675,7 +690,7 @@ test_redirect() reset_counters # add redirect rule -ip netns exec ns0 nft -f - <<EOF +ip netns exec "$ns0" nft -f /dev/stdin <<EOF table $family nat { chain prerouting { type nat hook prerouting priority 0; policy accept; @@ -688,9 +703,9 @@ EOF return $ksft_skip fi - ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 if [ $? -ne 0 ] ; then - echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect" + echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect" lret=1 fi @@ -698,9 +713,9 @@ EOF expect="packets 0 bytes 0" for dir in "in" "out" ; do - cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3" lret=1 fi done @@ -708,28 +723,28 @@ EOF # ns0 should have seen packets from ns2, due to masquerade expect="packets 1 bytes 84" for dir in "in" "out" ; do - cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect") + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect") if [ $? -ne 0 ]; then - bad_counter ns1 ns0$dir "$expect" + bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4" lret=1 fi done - ip netns exec ns0 nft delete table $family nat + ip netns exec "$ns0" nft delete table $family nat if [ $? -ne 0 ]; then echo "ERROR: Could not delete $family nat table" 1>&2 lret=1 fi - test $lret -eq 0 && echo "PASS: $family IP redirection for ns2" + test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2" return $lret } -# ip netns exec ns0 ping -c 1 -q 10.0.$i.99 +# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do -ip netns exec ns$i nft -f - <<EOF +ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF table inet filter { counter ns0in {} counter ns1in {} @@ -796,18 +811,18 @@ done sleep 3 # test basic connectivity for i in 1 2; do - ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null + ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null if [ $? -ne 0 ];then echo "ERROR: Could not reach other namespace(s)" 1>&2 ret=1 fi - ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null + ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null if [ $? -ne 0 ];then echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 ret=1 fi - check_counters ns$i + check_counters ns$i-$sfx if [ $? -ne 0 ]; then ret=1 fi @@ -820,7 +835,7 @@ for i in 1 2; do done if [ $ret -eq 0 ];then - echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2" + echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2" fi reset_counters @@ -846,4 +861,9 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet +if [ $ret -ne 0 ];then + echo -n "FAIL: " + nft --version +fi + exit $ret diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/openat2/.gitignore new file mode 100644 index 000000000000..bd68f6c3fd07 --- /dev/null +++ b/tools/testing/selftests/openat2/.gitignore @@ -0,0 +1 @@ +/*_test diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile new file mode 100644 index 000000000000..4b93b1417b86 --- /dev/null +++ b/tools/testing/selftests/openat2/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined +TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test + +include ../lib.mk + +$(TEST_GEN_PROGS): helpers.c diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c new file mode 100644 index 000000000000..e9a6557ab16f --- /dev/null +++ b/tools/testing/selftests/openat2/helpers.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <string.h> +#include <syscall.h> +#include <limits.h> + +#include "helpers.h" + +bool needs_openat2(const struct open_how *how) +{ + return how->resolve != 0; +} + +int raw_openat2(int dfd, const char *path, void *how, size_t size) +{ + int ret = syscall(__NR_openat2, dfd, path, how, size); + return ret >= 0 ? ret : -errno; +} + +int sys_openat2(int dfd, const char *path, struct open_how *how) +{ + return raw_openat2(dfd, path, how, sizeof(*how)); +} + +int sys_openat(int dfd, const char *path, struct open_how *how) +{ + int ret = openat(dfd, path, how->flags, how->mode); + return ret >= 0 ? ret : -errno; +} + +int sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags) +{ + int ret = syscall(__NR_renameat2, olddirfd, oldpath, + newdirfd, newpath, flags); + return ret >= 0 ? ret : -errno; +} + +int touchat(int dfd, const char *path) +{ + int fd = openat(dfd, path, O_CREAT); + if (fd >= 0) + close(fd); + return fd; +} + +char *fdreadlink(int fd) +{ + char *target, *tmp; + + E_asprintf(&tmp, "/proc/self/fd/%d", fd); + + target = malloc(PATH_MAX); + if (!target) + ksft_exit_fail_msg("fdreadlink: malloc failed\n"); + memset(target, 0, PATH_MAX); + + E_readlink(tmp, target, PATH_MAX); + free(tmp); + return target; +} + +bool fdequal(int fd, int dfd, const char *path) +{ + char *fdpath, *dfdpath, *other; + bool cmp; + + fdpath = fdreadlink(fd); + dfdpath = fdreadlink(dfd); + + if (!path) + E_asprintf(&other, "%s", dfdpath); + else if (*path == '/') + E_asprintf(&other, "%s", path); + else + E_asprintf(&other, "%s/%s", dfdpath, path); + + cmp = !strcmp(fdpath, other); + + free(fdpath); + free(dfdpath); + free(other); + return cmp; +} + +bool openat2_supported = false; + +void __attribute__((constructor)) init(void) +{ + struct open_how how = {}; + int fd; + + BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0); + + /* Check openat2(2) support. */ + fd = sys_openat2(AT_FDCWD, ".", &how); + openat2_supported = (fd >= 0); + + if (fd >= 0) + close(fd); +} diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h new file mode 100644 index 000000000000..a6ea27344db2 --- /dev/null +++ b/tools/testing/selftests/openat2/helpers.h @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#ifndef __RESOLVEAT_H__ +#define __RESOLVEAT_H__ + +#define _GNU_SOURCE +#include <stdint.h> +#include <errno.h> +#include <linux/types.h> +#include "../kselftest.h" + +#define ARRAY_LEN(X) (sizeof (X) / sizeof (*(X))) +#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); }))) + +#ifndef SYS_openat2 +#ifndef __NR_openat2 +#define __NR_openat2 437 +#endif /* __NR_openat2 */ +#define SYS_openat2 __NR_openat2 +#endif /* SYS_openat2 */ + +/* + * Arguments for how openat2(2) should open the target path. If @resolve is + * zero, then openat2(2) operates very similarly to openat(2). + * + * However, unlike openat(2), unknown bits in @flags result in -EINVAL rather + * than being silently ignored. @mode must be zero unless one of {O_CREAT, + * O_TMPFILE} are set. + * + * @flags: O_* flags. + * @mode: O_CREAT/O_TMPFILE file mode. + * @resolve: RESOLVE_* flags. + */ +struct open_how { + __u64 flags; + __u64 mode; + __u64 resolve; +}; + +#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ +#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0 + +bool needs_openat2(const struct open_how *how); + +#ifndef RESOLVE_IN_ROOT +/* how->resolve flags for openat2(2). */ +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings + (includes bind-mounts). */ +#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style + "magic-links". */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks + (implies OEXT_NO_MAGICLINKS) */ +#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like + "..", symlinks, and absolute + paths which escape the dirfd. */ +#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." + be scoped inside the dirfd + (similar to chroot(2)). */ +#endif /* RESOLVE_IN_ROOT */ + +#define E_func(func, ...) \ + do { \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed\n", \ + __FILE__, __LINE__, #func);\ + } while (0) + +#define E_asprintf(...) E_func(asprintf, __VA_ARGS__) +#define E_chmod(...) E_func(chmod, __VA_ARGS__) +#define E_dup2(...) E_func(dup2, __VA_ARGS__) +#define E_fchdir(...) E_func(fchdir, __VA_ARGS__) +#define E_fstatat(...) E_func(fstatat, __VA_ARGS__) +#define E_kill(...) E_func(kill, __VA_ARGS__) +#define E_mkdirat(...) E_func(mkdirat, __VA_ARGS__) +#define E_mount(...) E_func(mount, __VA_ARGS__) +#define E_prctl(...) E_func(prctl, __VA_ARGS__) +#define E_readlink(...) E_func(readlink, __VA_ARGS__) +#define E_setresuid(...) E_func(setresuid, __VA_ARGS__) +#define E_symlinkat(...) E_func(symlinkat, __VA_ARGS__) +#define E_touchat(...) E_func(touchat, __VA_ARGS__) +#define E_unshare(...) E_func(unshare, __VA_ARGS__) + +#define E_assert(expr, msg, ...) \ + do { \ + if (!(expr)) \ + ksft_exit_fail_msg("ASSERT(%s:%d) failed (%s): " msg "\n", \ + __FILE__, __LINE__, #expr, ##__VA_ARGS__); \ + } while (0) + +int raw_openat2(int dfd, const char *path, void *how, size_t size); +int sys_openat2(int dfd, const char *path, struct open_how *how); +int sys_openat(int dfd, const char *path, struct open_how *how); +int sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags); + +int touchat(int dfd, const char *path); +char *fdreadlink(int fd); +bool fdequal(int fd, int dfd, const char *path); + +extern bool openat2_supported; + +#endif /* __RESOLVEAT_H__ */ diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c new file mode 100644 index 000000000000..b386367c606b --- /dev/null +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> + +#include "../kselftest.h" +#include "helpers.h" + +/* + * O_LARGEFILE is set to 0 by glibc. + * XXX: This is wrong on {mips, parisc, powerpc, sparc}. + */ +#undef O_LARGEFILE +#define O_LARGEFILE 0x8000 + +struct open_how_ext { + struct open_how inner; + uint32_t extra1; + char pad1[128]; + uint32_t extra2; + char pad2[128]; + uint32_t extra3; +}; + +struct struct_test { + const char *name; + struct open_how_ext arg; + size_t size; + int err; +}; + +#define NUM_OPENAT2_STRUCT_TESTS 7 +#define NUM_OPENAT2_STRUCT_VARIATIONS 13 + +void test_openat2_struct(void) +{ + int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 }; + + struct struct_test tests[] = { + /* Normal struct. */ + { .name = "normal struct", + .arg.inner.flags = O_RDONLY, + .size = sizeof(struct open_how) }, + /* Bigger struct, with zeroed out end. */ + { .name = "bigger struct (zeroed out)", + .arg.inner.flags = O_RDONLY, + .size = sizeof(struct open_how_ext) }, + + /* TODO: Once expanded, check zero-padding. */ + + /* Smaller than version-0 struct. */ + { .name = "zero-sized 'struct'", + .arg.inner.flags = O_RDONLY, .size = 0, .err = -EINVAL }, + { .name = "smaller-than-v0 struct", + .arg.inner.flags = O_RDONLY, + .size = OPEN_HOW_SIZE_VER0 - 1, .err = -EINVAL }, + + /* Bigger struct, with non-zero trailing bytes. */ + { .name = "bigger struct (non-zero data in first 'future field')", + .arg.inner.flags = O_RDONLY, .arg.extra1 = 0xdeadbeef, + .size = sizeof(struct open_how_ext), .err = -E2BIG }, + { .name = "bigger struct (non-zero data in middle of 'future fields')", + .arg.inner.flags = O_RDONLY, .arg.extra2 = 0xfeedcafe, + .size = sizeof(struct open_how_ext), .err = -E2BIG }, + { .name = "bigger struct (non-zero data at end of 'future fields')", + .arg.inner.flags = O_RDONLY, .arg.extra3 = 0xabad1dea, + .size = sizeof(struct open_how_ext), .err = -E2BIG }, + }; + + BUILD_BUG_ON(ARRAY_LEN(misalignments) != NUM_OPENAT2_STRUCT_VARIATIONS); + BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_STRUCT_TESTS); + + for (int i = 0; i < ARRAY_LEN(tests); i++) { + struct struct_test *test = &tests[i]; + struct open_how_ext how_ext = test->arg; + + for (int j = 0; j < ARRAY_LEN(misalignments); j++) { + int fd, misalign = misalignments[j]; + char *fdpath = NULL; + bool failed; + void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; + + void *copy = NULL, *how_copy = &how_ext; + + if (!openat2_supported) { + ksft_print_msg("openat2(2) unsupported\n"); + resultfn = ksft_test_result_skip; + goto skip; + } + + if (misalign) { + /* + * Explicitly misalign the structure copying it with the given + * (mis)alignment offset. The other data is set to be non-zero to + * make sure that non-zero bytes outside the struct aren't checked + * + * This is effectively to check that is_zeroed_user() works. + */ + copy = malloc(misalign + sizeof(how_ext)); + how_copy = copy + misalign; + memset(copy, 0xff, misalign); + memcpy(how_copy, &how_ext, sizeof(how_ext)); + } + + fd = raw_openat2(AT_FDCWD, ".", how_copy, test->size); + if (test->err >= 0) + failed = (fd < 0); + else + failed = (fd != test->err); + if (fd >= 0) { + fdpath = fdreadlink(fd); + close(fd); + } + + if (failed) { + resultfn = ksft_test_result_fail; + + ksft_print_msg("openat2 unexpectedly returned "); + if (fdpath) + ksft_print_msg("%d['%s']\n", fd, fdpath); + else + ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); + } + +skip: + if (test->err >= 0) + resultfn("openat2 with %s argument [misalign=%d] succeeds\n", + test->name, misalign); + else + resultfn("openat2 with %s argument [misalign=%d] fails with %d (%s)\n", + test->name, misalign, test->err, + strerror(-test->err)); + + free(copy); + free(fdpath); + fflush(stdout); + } + } +} + +struct flag_test { + const char *name; + struct open_how how; + int err; +}; + +#define NUM_OPENAT2_FLAG_TESTS 23 + +void test_openat2_flags(void) +{ + struct flag_test tests[] = { + /* O_TMPFILE is incompatible with O_PATH and O_CREAT. */ + { .name = "incompatible flags (O_TMPFILE | O_PATH)", + .how.flags = O_TMPFILE | O_PATH | O_RDWR, .err = -EINVAL }, + { .name = "incompatible flags (O_TMPFILE | O_CREAT)", + .how.flags = O_TMPFILE | O_CREAT | O_RDWR, .err = -EINVAL }, + + /* O_PATH only permits certain other flags to be set ... */ + { .name = "compatible flags (O_PATH | O_CLOEXEC)", + .how.flags = O_PATH | O_CLOEXEC }, + { .name = "compatible flags (O_PATH | O_DIRECTORY)", + .how.flags = O_PATH | O_DIRECTORY }, + { .name = "compatible flags (O_PATH | O_NOFOLLOW)", + .how.flags = O_PATH | O_NOFOLLOW }, + /* ... and others are absolutely not permitted. */ + { .name = "incompatible flags (O_PATH | O_RDWR)", + .how.flags = O_PATH | O_RDWR, .err = -EINVAL }, + { .name = "incompatible flags (O_PATH | O_CREAT)", + .how.flags = O_PATH | O_CREAT, .err = -EINVAL }, + { .name = "incompatible flags (O_PATH | O_EXCL)", + .how.flags = O_PATH | O_EXCL, .err = -EINVAL }, + { .name = "incompatible flags (O_PATH | O_NOCTTY)", + .how.flags = O_PATH | O_NOCTTY, .err = -EINVAL }, + { .name = "incompatible flags (O_PATH | O_DIRECT)", + .how.flags = O_PATH | O_DIRECT, .err = -EINVAL }, + { .name = "incompatible flags (O_PATH | O_LARGEFILE)", + .how.flags = O_PATH | O_LARGEFILE, .err = -EINVAL }, + + /* ->mode must only be set with O_{CREAT,TMPFILE}. */ + { .name = "non-zero how.mode and O_RDONLY", + .how.flags = O_RDONLY, .how.mode = 0600, .err = -EINVAL }, + { .name = "non-zero how.mode and O_PATH", + .how.flags = O_PATH, .how.mode = 0600, .err = -EINVAL }, + { .name = "valid how.mode and O_CREAT", + .how.flags = O_CREAT, .how.mode = 0600 }, + { .name = "valid how.mode and O_TMPFILE", + .how.flags = O_TMPFILE | O_RDWR, .how.mode = 0600 }, + /* ->mode must only contain 0777 bits. */ + { .name = "invalid how.mode and O_CREAT", + .how.flags = O_CREAT, + .how.mode = 0xFFFF, .err = -EINVAL }, + { .name = "invalid (very large) how.mode and O_CREAT", + .how.flags = O_CREAT, + .how.mode = 0xC000000000000000ULL, .err = -EINVAL }, + { .name = "invalid how.mode and O_TMPFILE", + .how.flags = O_TMPFILE | O_RDWR, + .how.mode = 0x1337, .err = -EINVAL }, + { .name = "invalid (very large) how.mode and O_TMPFILE", + .how.flags = O_TMPFILE | O_RDWR, + .how.mode = 0x0000A00000000000ULL, .err = -EINVAL }, + + /* ->resolve must only contain RESOLVE_* flags. */ + { .name = "invalid how.resolve and O_RDONLY", + .how.flags = O_RDONLY, + .how.resolve = 0x1337, .err = -EINVAL }, + { .name = "invalid how.resolve and O_CREAT", + .how.flags = O_CREAT, + .how.resolve = 0x1337, .err = -EINVAL }, + { .name = "invalid how.resolve and O_TMPFILE", + .how.flags = O_TMPFILE | O_RDWR, + .how.resolve = 0x1337, .err = -EINVAL }, + { .name = "invalid how.resolve and O_PATH", + .how.flags = O_PATH, + .how.resolve = 0x1337, .err = -EINVAL }, + }; + + BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS); + + for (int i = 0; i < ARRAY_LEN(tests); i++) { + int fd, fdflags = -1; + char *path, *fdpath = NULL; + bool failed = false; + struct flag_test *test = &tests[i]; + void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; + + if (!openat2_supported) { + ksft_print_msg("openat2(2) unsupported\n"); + resultfn = ksft_test_result_skip; + goto skip; + } + + path = (test->how.flags & O_CREAT) ? "/tmp/ksft.openat2_tmpfile" : "."; + unlink(path); + + fd = sys_openat2(AT_FDCWD, path, &test->how); + if (test->err >= 0) + failed = (fd < 0); + else + failed = (fd != test->err); + if (fd >= 0) { + int otherflags; + + fdpath = fdreadlink(fd); + fdflags = fcntl(fd, F_GETFL); + otherflags = fcntl(fd, F_GETFD); + close(fd); + + E_assert(fdflags >= 0, "fcntl F_GETFL of new fd"); + E_assert(otherflags >= 0, "fcntl F_GETFD of new fd"); + + /* O_CLOEXEC isn't shown in F_GETFL. */ + if (otherflags & FD_CLOEXEC) + fdflags |= O_CLOEXEC; + /* O_CREAT is hidden from F_GETFL. */ + if (test->how.flags & O_CREAT) + fdflags |= O_CREAT; + if (!(test->how.flags & O_LARGEFILE)) + fdflags &= ~O_LARGEFILE; + failed |= (fdflags != test->how.flags); + } + + if (failed) { + resultfn = ksft_test_result_fail; + + ksft_print_msg("openat2 unexpectedly returned "); + if (fdpath) + ksft_print_msg("%d['%s'] with %X (!= %X)\n", + fd, fdpath, fdflags, + test->how.flags); + else + ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); + } + +skip: + if (test->err >= 0) + resultfn("openat2 with %s succeeds\n", test->name); + else + resultfn("openat2 with %s fails with %d (%s)\n", + test->name, test->err, strerror(-test->err)); + + free(fdpath); + fflush(stdout); + } +} + +#define NUM_TESTS (NUM_OPENAT2_STRUCT_VARIATIONS * NUM_OPENAT2_STRUCT_TESTS + \ + NUM_OPENAT2_FLAG_TESTS) + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(NUM_TESTS); + + test_openat2_struct(); + test_openat2_flags(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/openat2/rename_attack_test.c b/tools/testing/selftests/openat2/rename_attack_test.c new file mode 100644 index 000000000000..0a770728b436 --- /dev/null +++ b/tools/testing/selftests/openat2/rename_attack_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <syscall.h> +#include <limits.h> +#include <unistd.h> + +#include "../kselftest.h" +#include "helpers.h" + +/* Construct a test directory with the following structure: + * + * root/ + * |-- a/ + * | `-- c/ + * `-- b/ + */ +int setup_testdir(void) +{ + int dfd; + char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX"; + + /* Make the top-level directory. */ + if (!mkdtemp(dirname)) + ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n"); + dfd = open(dirname, O_PATH | O_DIRECTORY); + if (dfd < 0) + ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); + + E_mkdirat(dfd, "a", 0755); + E_mkdirat(dfd, "b", 0755); + E_mkdirat(dfd, "a/c", 0755); + + return dfd; +} + +/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */ +pid_t spawn_attack(int dirfd, char *a, char *b) +{ + pid_t child = fork(); + if (child != 0) + return child; + + /* If the parent (the test process) dies, kill ourselves too. */ + E_prctl(PR_SET_PDEATHSIG, SIGKILL); + + /* Swap @a and @b. */ + for (;;) + renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE); + exit(1); +} + +#define NUM_RENAME_TESTS 2 +#define ROUNDS 400000 + +const char *flagname(int resolve) +{ + switch (resolve) { + case RESOLVE_IN_ROOT: + return "RESOLVE_IN_ROOT"; + case RESOLVE_BENEATH: + return "RESOLVE_BENEATH"; + } + return "(unknown)"; +} + +void test_rename_attack(int resolve) +{ + int dfd, afd; + pid_t child; + void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; + int escapes = 0, other_errs = 0, exdevs = 0, eagains = 0, successes = 0; + + struct open_how how = { + .flags = O_PATH, + .resolve = resolve, + }; + + if (!openat2_supported) { + how.resolve = 0; + ksft_print_msg("openat2(2) unsupported -- using openat(2) instead\n"); + } + + dfd = setup_testdir(); + afd = openat(dfd, "a", O_PATH); + if (afd < 0) + ksft_exit_fail_msg("test_rename_attack: failed to open 'a'\n"); + + child = spawn_attack(dfd, "a/c", "b"); + + for (int i = 0; i < ROUNDS; i++) { + int fd; + char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../.."; + + if (openat2_supported) + fd = sys_openat2(afd, victim_path, &how); + else + fd = sys_openat(afd, victim_path, &how); + + if (fd < 0) { + if (fd == -EAGAIN) + eagains++; + else if (fd == -EXDEV) + exdevs++; + else if (fd == -ENOENT) + escapes++; /* escaped outside and got ENOENT... */ + else + other_errs++; /* unexpected error */ + } else { + if (fdequal(fd, afd, NULL)) + successes++; + else + escapes++; /* we got an unexpected fd */ + } + close(fd); + } + + if (escapes > 0) + resultfn = ksft_test_result_fail; + ksft_print_msg("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d\n", + eagains, exdevs, other_errs, successes); + resultfn("rename attack with %s (%d runs, got %d escapes)\n", + flagname(resolve), ROUNDS, escapes); + + /* Should be killed anyway, but might as well make sure. */ + E_kill(child, SIGKILL); +} + +#define NUM_TESTS NUM_RENAME_TESTS + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(NUM_TESTS); + + test_rename_attack(RESOLVE_BENEATH); + test_rename_attack(RESOLVE_IN_ROOT); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/openat2/resolve_test.c new file mode 100644 index 000000000000..7a94b1da8e7b --- /dev/null +++ b/tools/testing/selftests/openat2/resolve_test.c @@ -0,0 +1,523 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> + +#include "../kselftest.h" +#include "helpers.h" + +/* + * Construct a test directory with the following structure: + * + * root/ + * |-- procexe -> /proc/self/exe + * |-- procroot -> /proc/self/root + * |-- root/ + * |-- mnt/ [mountpoint] + * | |-- self -> ../mnt/ + * | `-- absself -> /mnt/ + * |-- etc/ + * | `-- passwd + * |-- creatlink -> /newfile3 + * |-- reletc -> etc/ + * |-- relsym -> etc/passwd + * |-- absetc -> /etc/ + * |-- abssym -> /etc/passwd + * |-- abscheeky -> /cheeky + * `-- cheeky/ + * |-- absself -> / + * |-- self -> ../../root/ + * |-- garbageself -> /../../root/ + * |-- passwd -> ../cheeky/../cheeky/../etc/../etc/passwd + * |-- abspasswd -> /../cheeky/../cheeky/../etc/../etc/passwd + * |-- dotdotlink -> ../../../../../../../../../../../../../../etc/passwd + * `-- garbagelink -> /../../../../../../../../../../../../../../etc/passwd + */ +int setup_testdir(void) +{ + int dfd, tmpfd; + char dirname[] = "/tmp/ksft-openat2-testdir.XXXXXX"; + + /* Unshare and make /tmp a new directory. */ + E_unshare(CLONE_NEWNS); + E_mount("", "/tmp", "", MS_PRIVATE, ""); + + /* Make the top-level directory. */ + if (!mkdtemp(dirname)) + ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n"); + dfd = open(dirname, O_PATH | O_DIRECTORY); + if (dfd < 0) + ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); + + /* A sub-directory which is actually used for tests. */ + E_mkdirat(dfd, "root", 0755); + tmpfd = openat(dfd, "root", O_PATH | O_DIRECTORY); + if (tmpfd < 0) + ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); + close(dfd); + dfd = tmpfd; + + E_symlinkat("/proc/self/exe", dfd, "procexe"); + E_symlinkat("/proc/self/root", dfd, "procroot"); + E_mkdirat(dfd, "root", 0755); + + /* There is no mountat(2), so use chdir. */ + E_mkdirat(dfd, "mnt", 0755); + E_fchdir(dfd); + E_mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, ""); + E_symlinkat("../mnt/", dfd, "mnt/self"); + E_symlinkat("/mnt/", dfd, "mnt/absself"); + + E_mkdirat(dfd, "etc", 0755); + E_touchat(dfd, "etc/passwd"); + + E_symlinkat("/newfile3", dfd, "creatlink"); + E_symlinkat("etc/", dfd, "reletc"); + E_symlinkat("etc/passwd", dfd, "relsym"); + E_symlinkat("/etc/", dfd, "absetc"); + E_symlinkat("/etc/passwd", dfd, "abssym"); + E_symlinkat("/cheeky", dfd, "abscheeky"); + + E_mkdirat(dfd, "cheeky", 0755); + + E_symlinkat("/", dfd, "cheeky/absself"); + E_symlinkat("../../root/", dfd, "cheeky/self"); + E_symlinkat("/../../root/", dfd, "cheeky/garbageself"); + + E_symlinkat("../cheeky/../etc/../etc/passwd", dfd, "cheeky/passwd"); + E_symlinkat("/../cheeky/../etc/../etc/passwd", dfd, "cheeky/abspasswd"); + + E_symlinkat("../../../../../../../../../../../../../../etc/passwd", + dfd, "cheeky/dotdotlink"); + E_symlinkat("/../../../../../../../../../../../../../../etc/passwd", + dfd, "cheeky/garbagelink"); + + return dfd; +} + +struct basic_test { + const char *name; + const char *dir; + const char *path; + struct open_how how; + bool pass; + union { + int err; + const char *path; + } out; +}; + +#define NUM_OPENAT2_OPATH_TESTS 88 + +void test_openat2_opath_tests(void) +{ + int rootfd, hardcoded_fd; + char *procselfexe, *hardcoded_fdpath; + + E_asprintf(&procselfexe, "/proc/%d/exe", getpid()); + rootfd = setup_testdir(); + + hardcoded_fd = open("/dev/null", O_RDONLY); + E_assert(hardcoded_fd >= 0, "open fd to hardcode"); + E_asprintf(&hardcoded_fdpath, "self/fd/%d", hardcoded_fd); + + struct basic_test tests[] = { + /** RESOLVE_BENEATH **/ + /* Attempts to cross dirfd should be blocked. */ + { .name = "[beneath] jump to /", + .path = "/", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] absolute link to $root", + .path = "cheeky/absself", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] chained absolute links to $root", + .path = "abscheeky/absself", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] jump outside $root", + .path = "..", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] temporary jump outside $root", + .path = "../root/", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] symlink temporary jump outside $root", + .path = "cheeky/self", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] chained symlink temporary jump outside $root", + .path = "abscheeky/self", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] garbage links to $root", + .path = "cheeky/garbageself", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] chained garbage links to $root", + .path = "abscheeky/garbageself", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + /* Only relative paths that stay inside dirfd should work. */ + { .name = "[beneath] ordinary path to 'root'", + .path = "root", .how.resolve = RESOLVE_BENEATH, + .out.path = "root", .pass = true }, + { .name = "[beneath] ordinary path to 'etc'", + .path = "etc", .how.resolve = RESOLVE_BENEATH, + .out.path = "etc", .pass = true }, + { .name = "[beneath] ordinary path to 'etc/passwd'", + .path = "etc/passwd", .how.resolve = RESOLVE_BENEATH, + .out.path = "etc/passwd", .pass = true }, + { .name = "[beneath] relative symlink inside $root", + .path = "relsym", .how.resolve = RESOLVE_BENEATH, + .out.path = "etc/passwd", .pass = true }, + { .name = "[beneath] chained-'..' relative symlink inside $root", + .path = "cheeky/passwd", .how.resolve = RESOLVE_BENEATH, + .out.path = "etc/passwd", .pass = true }, + { .name = "[beneath] absolute symlink component outside $root", + .path = "abscheeky/passwd", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] absolute symlink target outside $root", + .path = "abssym", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] absolute path outside $root", + .path = "/etc/passwd", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] cheeky absolute path outside $root", + .path = "cheeky/abspasswd", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] chained cheeky absolute path outside $root", + .path = "abscheeky/abspasswd", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + /* Tricky paths should fail. */ + { .name = "[beneath] tricky '..'-chained symlink outside $root", + .path = "cheeky/dotdotlink", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] tricky absolute + '..'-chained symlink outside $root", + .path = "abscheeky/dotdotlink", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] tricky garbage link outside $root", + .path = "cheeky/garbagelink", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + { .name = "[beneath] tricky absolute + garbage link outside $root", + .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_BENEATH, + .out.err = -EXDEV, .pass = false }, + + /** RESOLVE_IN_ROOT **/ + /* All attempts to cross the dirfd will be scoped-to-root. */ + { .name = "[in_root] jump to /", + .path = "/", .how.resolve = RESOLVE_IN_ROOT, + .out.path = NULL, .pass = true }, + { .name = "[in_root] absolute symlink to /root", + .path = "cheeky/absself", .how.resolve = RESOLVE_IN_ROOT, + .out.path = NULL, .pass = true }, + { .name = "[in_root] chained absolute symlinks to /root", + .path = "abscheeky/absself", .how.resolve = RESOLVE_IN_ROOT, + .out.path = NULL, .pass = true }, + { .name = "[in_root] '..' at root", + .path = "..", .how.resolve = RESOLVE_IN_ROOT, + .out.path = NULL, .pass = true }, + { .name = "[in_root] '../root' at root", + .path = "../root/", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "root", .pass = true }, + { .name = "[in_root] relative symlink containing '..' above root", + .path = "cheeky/self", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "root", .pass = true }, + { .name = "[in_root] garbage link to /root", + .path = "cheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "root", .pass = true }, + { .name = "[in_root] chainged garbage links to /root", + .path = "abscheeky/garbageself", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "root", .pass = true }, + { .name = "[in_root] relative path to 'root'", + .path = "root", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "root", .pass = true }, + { .name = "[in_root] relative path to 'etc'", + .path = "etc", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc", .pass = true }, + { .name = "[in_root] relative path to 'etc/passwd'", + .path = "etc/passwd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] relative symlink to 'etc/passwd'", + .path = "relsym", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] chained-'..' relative symlink to 'etc/passwd'", + .path = "cheeky/passwd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] chained-'..' absolute + relative symlink to 'etc/passwd'", + .path = "abscheeky/passwd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] absolute symlink to 'etc/passwd'", + .path = "abssym", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] absolute path 'etc/passwd'", + .path = "/etc/passwd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] cheeky absolute path 'etc/passwd'", + .path = "cheeky/abspasswd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] chained cheeky absolute path 'etc/passwd'", + .path = "abscheeky/abspasswd", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky '..'-chained symlink outside $root", + .path = "cheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky absolute + '..'-chained symlink outside $root", + .path = "abscheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky absolute path + absolute + '..'-chained symlink outside $root", + .path = "/../../../../abscheeky/dotdotlink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky garbage link outside $root", + .path = "cheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky absolute + garbage link outside $root", + .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + { .name = "[in_root] tricky absolute path + absolute + garbage link outside $root", + .path = "/../../../../abscheeky/garbagelink", .how.resolve = RESOLVE_IN_ROOT, + .out.path = "etc/passwd", .pass = true }, + /* O_CREAT should handle trailing symlinks correctly. */ + { .name = "[in_root] O_CREAT of relative path inside $root", + .path = "newfile1", .how.flags = O_CREAT, + .how.mode = 0700, + .how.resolve = RESOLVE_IN_ROOT, + .out.path = "newfile1", .pass = true }, + { .name = "[in_root] O_CREAT of absolute path", + .path = "/newfile2", .how.flags = O_CREAT, + .how.mode = 0700, + .how.resolve = RESOLVE_IN_ROOT, + .out.path = "newfile2", .pass = true }, + { .name = "[in_root] O_CREAT of tricky symlink outside root", + .path = "/creatlink", .how.flags = O_CREAT, + .how.mode = 0700, + .how.resolve = RESOLVE_IN_ROOT, + .out.path = "newfile3", .pass = true }, + + /** RESOLVE_NO_XDEV **/ + /* Crossing *down* into a mountpoint is disallowed. */ + { .name = "[no_xdev] cross into $mnt", + .path = "mnt", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] cross into $mnt/", + .path = "mnt/", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] cross into $mnt/.", + .path = "mnt/.", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + /* Crossing *up* out of a mountpoint is disallowed. */ + { .name = "[no_xdev] goto mountpoint root", + .dir = "mnt", .path = ".", .how.resolve = RESOLVE_NO_XDEV, + .out.path = "mnt", .pass = true }, + { .name = "[no_xdev] cross up through '..'", + .dir = "mnt", .path = "..", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] temporary cross up through '..'", + .dir = "mnt", .path = "../mnt", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] temporary relative symlink cross up", + .dir = "mnt", .path = "self", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] temporary absolute symlink cross up", + .dir = "mnt", .path = "absself", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + /* Jumping to "/" is ok, but later components cannot cross. */ + { .name = "[no_xdev] jump to / directly", + .dir = "mnt", .path = "/", .how.resolve = RESOLVE_NO_XDEV, + .out.path = "/", .pass = true }, + { .name = "[no_xdev] jump to / (from /) directly", + .dir = "/", .path = "/", .how.resolve = RESOLVE_NO_XDEV, + .out.path = "/", .pass = true }, + { .name = "[no_xdev] jump to / then proc", + .path = "/proc/1", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] jump to / then tmp", + .path = "/tmp", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + /* Magic-links are blocked since they can switch vfsmounts. */ + { .name = "[no_xdev] cross through magic-link to self/root", + .dir = "/proc", .path = "self/root", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + { .name = "[no_xdev] cross through magic-link to self/cwd", + .dir = "/proc", .path = "self/cwd", .how.resolve = RESOLVE_NO_XDEV, + .out.err = -EXDEV, .pass = false }, + /* Except magic-link jumps inside the same vfsmount. */ + { .name = "[no_xdev] jump through magic-link to same procfs", + .dir = "/proc", .path = hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV, + .out.path = "/proc", .pass = true, }, + + /** RESOLVE_NO_MAGICLINKS **/ + /* Regular symlinks should work. */ + { .name = "[no_magiclinks] ordinary relative symlink", + .path = "relsym", .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.path = "etc/passwd", .pass = true }, + /* Magic-links should not work. */ + { .name = "[no_magiclinks] symlink to magic-link", + .path = "procexe", .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_magiclinks] normal path to magic-link", + .path = "/proc/self/exe", .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_magiclinks] normal path to magic-link with O_NOFOLLOW", + .path = "/proc/self/exe", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.path = procselfexe, .pass = true }, + { .name = "[no_magiclinks] symlink to magic-link path component", + .path = "procroot/etc", .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_magiclinks] magic-link path component", + .path = "/proc/self/root/etc", .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_magiclinks] magic-link path component with O_NOFOLLOW", + .path = "/proc/self/root/etc", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_MAGICLINKS, + .out.err = -ELOOP, .pass = false }, + + /** RESOLVE_NO_SYMLINKS **/ + /* Normal paths should work. */ + { .name = "[no_symlinks] ordinary path to '.'", + .path = ".", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = NULL, .pass = true }, + { .name = "[no_symlinks] ordinary path to 'root'", + .path = "root", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "root", .pass = true }, + { .name = "[no_symlinks] ordinary path to 'etc'", + .path = "etc", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "etc", .pass = true }, + { .name = "[no_symlinks] ordinary path to 'etc/passwd'", + .path = "etc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "etc/passwd", .pass = true }, + /* Regular symlinks are blocked. */ + { .name = "[no_symlinks] relative symlink target", + .path = "relsym", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] relative symlink component", + .path = "reletc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] absolute symlink target", + .path = "abssym", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] absolute symlink component", + .path = "absetc/passwd", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] cheeky garbage link", + .path = "cheeky/garbagelink", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] cheeky absolute + garbage link", + .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] cheeky absolute + absolute symlink", + .path = "abscheeky/absself", .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + /* Trailing symlinks with NO_FOLLOW. */ + { .name = "[no_symlinks] relative symlink with O_NOFOLLOW", + .path = "relsym", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "relsym", .pass = true }, + { .name = "[no_symlinks] absolute symlink with O_NOFOLLOW", + .path = "abssym", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "abssym", .pass = true }, + { .name = "[no_symlinks] trailing symlink with O_NOFOLLOW", + .path = "cheeky/garbagelink", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_SYMLINKS, + .out.path = "cheeky/garbagelink", .pass = true }, + { .name = "[no_symlinks] multiple symlink components with O_NOFOLLOW", + .path = "abscheeky/absself", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + { .name = "[no_symlinks] multiple symlink (and garbage link) components with O_NOFOLLOW", + .path = "abscheeky/garbagelink", .how.flags = O_NOFOLLOW, + .how.resolve = RESOLVE_NO_SYMLINKS, + .out.err = -ELOOP, .pass = false }, + }; + + BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_OPATH_TESTS); + + for (int i = 0; i < ARRAY_LEN(tests); i++) { + int dfd, fd; + char *fdpath = NULL; + bool failed; + void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; + struct basic_test *test = &tests[i]; + + if (!openat2_supported) { + ksft_print_msg("openat2(2) unsupported\n"); + resultfn = ksft_test_result_skip; + goto skip; + } + + /* Auto-set O_PATH. */ + if (!(test->how.flags & O_CREAT)) + test->how.flags |= O_PATH; + + if (test->dir) + dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY); + else + dfd = dup(rootfd); + E_assert(dfd, "failed to openat root '%s': %m", test->dir); + + E_dup2(dfd, hardcoded_fd); + + fd = sys_openat2(dfd, test->path, &test->how); + if (test->pass) + failed = (fd < 0 || !fdequal(fd, rootfd, test->out.path)); + else + failed = (fd != test->out.err); + if (fd >= 0) { + fdpath = fdreadlink(fd); + close(fd); + } + close(dfd); + + if (failed) { + resultfn = ksft_test_result_fail; + + ksft_print_msg("openat2 unexpectedly returned "); + if (fdpath) + ksft_print_msg("%d['%s']\n", fd, fdpath); + else + ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); + } + +skip: + if (test->pass) + resultfn("%s gives path '%s'\n", test->name, + test->out.path ?: "."); + else + resultfn("%s fails with %d (%s)\n", test->name, + test->out.err, strerror(-test->out.err)); + + fflush(stdout); + free(fdpath); + } + + free(procselfexe); + close(rootfd); + + free(hardcoded_fdpath); + close(hardcoded_fd); +} + +#define NUM_TESTS NUM_OPENAT2_OPATH_TESTS + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(NUM_TESTS); + + /* NOTE: We should be checking for CAP_SYS_ADMIN here... */ + if (geteuid() != 0) + ksft_exit_skip("all tests require euid == 0\n"); + + test_openat2_opath_tests(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 16d84d117bc0..3a779c084d96 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -1,2 +1,5 @@ pidfd_open_test +pidfd_poll_test pidfd_test +pidfd_wait +pidfd_getfd_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index 720b2d884b3c..75a545861375 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g -I../../../../usr/include/ -lpthread +CFLAGS += -g -I../../../../usr/include/ -pthread -TEST_GEN_PROGS := pidfd_test pidfd_open_test +TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 8452e910463f..d482515604db 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -16,6 +16,30 @@ #include "../kselftest.h" +#ifndef P_PIDFD +#define P_PIDFD 3 +#endif + +#ifndef CLONE_PIDFD +#define CLONE_PIDFD 0x00001000 +#endif + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +#ifndef __NR_pidfd_send_signal +#define __NR_pidfd_send_signal -1 +#endif + +#ifndef __NR_clone3 +#define __NR_clone3 -1 +#endif + +#ifndef __NR_pidfd_getfd +#define __NR_pidfd_getfd -1 +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. @@ -53,5 +77,20 @@ again: return WEXITSTATUS(status); } +static inline int sys_pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(__NR_pidfd_open, pid, flags); +} + +static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, + unsigned int flags) +{ + return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); +} + +static inline int sys_pidfd_getfd(int pidfd, int fd, int flags) +{ + return syscall(__NR_pidfd_getfd, pidfd, fd, flags); +} #endif /* __PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c new file mode 100644 index 000000000000..22558524f71c --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/types.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/wait.h> + +#include "pidfd.h" +#include "../kselftest.h" + +struct error { + int code; + char msg[512]; +}; + +static int error_set(struct error *err, int code, const char *fmt, ...) +{ + va_list args; + int r; + + if (code == PIDFD_PASS || !err || err->code != PIDFD_PASS) + return code; + + err->code = code; + va_start(args, fmt); + r = vsnprintf(err->msg, sizeof(err->msg), fmt, args); + assert((size_t)r < sizeof(err->msg)); + va_end(args); + + return code; +} + +static void error_report(struct error *err, const char *test_name) +{ + switch (err->code) { + case PIDFD_ERROR: + ksft_exit_fail_msg("%s test: Fatal: %s\n", test_name, err->msg); + break; + + case PIDFD_FAIL: + /* will be: not ok %d # error %s test: %s */ + ksft_test_result_error("%s test: %s\n", test_name, err->msg); + break; + + case PIDFD_SKIP: + /* will be: not ok %d # SKIP %s test: %s */ + ksft_test_result_skip("%s test: %s\n", test_name, err->msg); + break; + + case PIDFD_XFAIL: + ksft_test_result_pass("%s test: Expected failure: %s\n", + test_name, err->msg); + break; + + case PIDFD_PASS: + ksft_test_result_pass("%s test: Passed\n"); + break; + + default: + ksft_exit_fail_msg("%s test: Unknown code: %d %s\n", + test_name, err->code, err->msg); + break; + } +} + +static inline int error_check(struct error *err, const char *test_name) +{ + /* In case of error we bail out and terminate the test program */ + if (err->code == PIDFD_ERROR) + error_report(err, test_name); + + return err->code; +} + +struct child { + pid_t pid; + int fd; +}; + +static struct child clone_newns(int (*fn)(void *), void *args, + struct error *err) +{ + static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD; + size_t stack_size = 1024; + char *stack[1024] = { 0 }; + struct child ret; + + if (!(flags & CLONE_NEWUSER) && geteuid() != 0) + flags |= CLONE_NEWUSER; + +#ifdef __ia64__ + ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd); +#else + ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd); +#endif + + if (ret.pid < 0) { + error_set(err, PIDFD_ERROR, "clone failed (ret %d, errno %d)", + ret.fd, errno); + return ret; + } + + ksft_print_msg("New child: %d, fd: %d\n", ret.pid, ret.fd); + + return ret; +} + +static inline void child_close(struct child *child) +{ + close(child->fd); +} + +static inline int child_join(struct child *child, struct error *err) +{ + int r; + + r = wait_for_pid(child->pid); + if (r < 0) + error_set(err, PIDFD_ERROR, "waitpid failed (ret %d, errno %d)", + r, errno); + else if (r > 0) + error_set(err, r, "child %d reported: %d", child->pid, r); + + return r; +} + +static inline int child_join_close(struct child *child, struct error *err) +{ + child_close(child); + return child_join(child, err); +} + +static inline void trim_newline(char *str) +{ + char *pos = strrchr(str, '\n'); + + if (pos) + *pos = '\0'; +} + +static int verify_fdinfo(int pidfd, struct error *err, const char *prefix, + size_t prefix_len, const char *expect, ...) +{ + char buffer[512] = {0, }; + char path[512] = {0, }; + va_list args; + FILE *f; + char *line = NULL; + size_t n = 0; + int found = 0; + int r; + + va_start(args, expect); + r = vsnprintf(buffer, sizeof(buffer), expect, args); + assert((size_t)r < sizeof(buffer)); + va_end(args); + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd); + f = fopen(path, "re"); + if (!f) + return error_set(err, PIDFD_ERROR, "fdinfo open failed for %d", + pidfd); + + while (getline(&line, &n, f) != -1) { + char *val; + + if (strncmp(line, prefix, prefix_len)) + continue; + + found = 1; + + val = line + prefix_len; + r = strcmp(val, buffer); + if (r != 0) { + trim_newline(line); + trim_newline(buffer); + error_set(err, PIDFD_FAIL, "%s '%s' != '%s'", + prefix, val, buffer); + } + break; + } + + free(line); + fclose(f); + + if (found == 0) + return error_set(err, PIDFD_FAIL, "%s not found for fd %d", + prefix, pidfd); + + return PIDFD_PASS; +} + +static int child_fdinfo_nspid_test(void *args) +{ + struct error err; + int pidfd; + int r; + + /* if we got no fd for the sibling, we are done */ + if (!args) + return PIDFD_PASS; + + /* verify that we can not resolve the pidfd for a process + * in a sibling pid namespace, i.e. a pid namespace it is + * not in our or a descended namespace + */ + r = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (r < 0) { + ksft_print_msg("Failed to remount / private\n"); + return PIDFD_ERROR; + } + + (void)umount2("/proc", MNT_DETACH); + r = mount("proc", "/proc", "proc", 0, NULL); + if (r < 0) { + ksft_print_msg("Failed to remount /proc\n"); + return PIDFD_ERROR; + } + + pidfd = *(int *)args; + r = verify_fdinfo(pidfd, &err, "NSpid:", 6, "\t0\n"); + + if (r != PIDFD_PASS) + ksft_print_msg("NSpid fdinfo check failed: %s\n", err.msg); + + return r; +} + +static void test_pidfd_fdinfo_nspid(void) +{ + struct child a, b; + struct error err = {0, }; + const char *test_name = "pidfd check for NSpid in fdinfo"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); + + /* Pass the pidfd representing the first child to the + * second child, which will be in a sibling pid namespace, + * which means that the fdinfo NSpid entry for the pidfd + * should only contain '0'. + */ + b = clone_newns(child_fdinfo_nspid_test, &a.fd, &err); + error_check(&err, test_name); + + /* The children will have pid 1 in the new pid namespace, + * so the line must be 'NSPid:\t<pid>\t1'. + */ + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t%d\t%d\n", a.pid, 1); + verify_fdinfo(b.fd, &err, "NSpid:", 6, "\t%d\t%d\n", b.pid, 1); + + /* wait for the process, check the exit status and set + * 'err' accordingly, if it is not already set. + */ + child_join_close(&a, &err); + child_join_close(&b, &err); + + error_report(&err, test_name); +} + +static void test_pidfd_dead_fdinfo(void) +{ + struct child a; + struct error err = {0, }; + const char *test_name = "pidfd check fdinfo for dead process"; + + /* Create a new child in a new pid and mount namespace */ + a = clone_newns(child_fdinfo_nspid_test, NULL, &err); + error_check(&err, test_name); + child_join(&a, &err); + + verify_fdinfo(a.fd, &err, "Pid:", 4, "\t-1\n"); + verify_fdinfo(a.fd, &err, "NSpid:", 6, "\t-1\n"); + child_close(&a); + error_report(&err, test_name); +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_pidfd_fdinfo_nspid(); + test_pidfd_dead_fdinfo(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c new file mode 100644 index 000000000000..401a7c1d0312 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> + +#include "pidfd.h" +#include "../kselftest.h" +#include "../kselftest_harness.h" + +/* + * UNKNOWN_FD is an fd number that should never exist in the child, as it is + * used to check the negative case. + */ +#define UNKNOWN_FD 111 +#define UID_NOBODY 65535 + +static int sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, + unsigned long idx2) +{ + return syscall(__NR_kcmp, pid1, pid2, type, idx1, idx2); +} + +static int sys_memfd_create(const char *name, unsigned int flags) +{ + return syscall(__NR_memfd_create, name, flags); +} + +static int __child(int sk, int memfd) +{ + int ret; + char buf; + + /* + * Ensure we don't leave around a bunch of orphaned children if our + * tests fail. + */ + ret = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (ret) { + fprintf(stderr, "%s: Child could not set DEATHSIG\n", + strerror(errno)); + return -1; + } + + ret = send(sk, &memfd, sizeof(memfd), 0); + if (ret != sizeof(memfd)) { + fprintf(stderr, "%s: Child failed to send fd number\n", + strerror(errno)); + return -1; + } + + /* + * The fixture setup is completed at this point. The tests will run. + * + * This blocking recv enables the parent to message the child. + * Either we will read 'P' off of the sk, indicating that we need + * to disable ptrace, or we will read a 0, indicating that the other + * side has closed the sk. This occurs during fixture teardown time, + * indicating that the child should exit. + */ + while ((ret = recv(sk, &buf, sizeof(buf), 0)) > 0) { + if (buf == 'P') { + ret = prctl(PR_SET_DUMPABLE, 0); + if (ret < 0) { + fprintf(stderr, + "%s: Child failed to disable ptrace\n", + strerror(errno)); + return -1; + } + } else { + fprintf(stderr, "Child received unknown command %c\n", + buf); + return -1; + } + ret = send(sk, &buf, sizeof(buf), 0); + if (ret != 1) { + fprintf(stderr, "%s: Child failed to ack\n", + strerror(errno)); + return -1; + } + } + if (ret < 0) { + fprintf(stderr, "%s: Child failed to read from socket\n", + strerror(errno)); + return -1; + } + + return 0; +} + +static int child(int sk) +{ + int memfd, ret; + + memfd = sys_memfd_create("test", 0); + if (memfd < 0) { + fprintf(stderr, "%s: Child could not create memfd\n", + strerror(errno)); + ret = -1; + } else { + ret = __child(sk, memfd); + close(memfd); + } + + close(sk); + return ret; +} + +FIXTURE(child) +{ + /* + * remote_fd is the number of the FD which we are trying to retrieve + * from the child. + */ + int remote_fd; + /* pid points to the child which we are fetching FDs from */ + pid_t pid; + /* pidfd is the pidfd of the child */ + int pidfd; + /* + * sk is our side of the socketpair used to communicate with the child. + * When it is closed, the child will exit. + */ + int sk; +}; + +FIXTURE_SETUP(child) +{ + int ret, sk_pair[2]; + + ASSERT_EQ(0, socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair)) { + TH_LOG("%s: failed to create socketpair", strerror(errno)); + } + self->sk = sk_pair[0]; + + self->pid = fork(); + ASSERT_GE(self->pid, 0); + + if (self->pid == 0) { + close(sk_pair[0]); + if (child(sk_pair[1])) + _exit(EXIT_FAILURE); + _exit(EXIT_SUCCESS); + } + + close(sk_pair[1]); + + self->pidfd = sys_pidfd_open(self->pid, 0); + ASSERT_GE(self->pidfd, 0); + + /* + * Wait for the child to complete setup. It'll send the remote memfd's + * number when ready. + */ + ret = recv(sk_pair[0], &self->remote_fd, sizeof(self->remote_fd), 0); + ASSERT_EQ(sizeof(self->remote_fd), ret); +} + +FIXTURE_TEARDOWN(child) +{ + EXPECT_EQ(0, close(self->pidfd)); + EXPECT_EQ(0, close(self->sk)); + + EXPECT_EQ(0, wait_for_pid(self->pid)); +} + +TEST_F(child, disable_ptrace) +{ + int uid, fd; + char c; + + /* + * Turn into nobody if we're root, to avoid CAP_SYS_PTRACE + * + * The tests should run in their own process, so even this test fails, + * it shouldn't result in subsequent tests failing. + */ + uid = getuid(); + if (uid == 0) + ASSERT_EQ(0, seteuid(UID_NOBODY)); + + ASSERT_EQ(1, send(self->sk, "P", 1, 0)); + ASSERT_EQ(1, recv(self->sk, &c, 1, 0)); + + fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); + EXPECT_EQ(-1, fd); + EXPECT_EQ(EPERM, errno); + + if (uid == 0) + ASSERT_EQ(0, seteuid(0)); +} + +TEST_F(child, fetch_fd) +{ + int fd, ret; + + fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); + ASSERT_GE(fd, 0); + + EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + + ret = fcntl(fd, F_GETFD); + ASSERT_GE(ret, 0); + EXPECT_GE(ret & FD_CLOEXEC, 0); + + close(fd); +} + +TEST_F(child, test_unknown_fd) +{ + int fd; + + fd = sys_pidfd_getfd(self->pidfd, UNKNOWN_FD, 0); + EXPECT_EQ(-1, fd) { + TH_LOG("getfd succeeded while fetching unknown fd"); + }; + EXPECT_EQ(EBADF, errno) { + TH_LOG("%s: getfd did not get EBADF", strerror(errno)); + } +} + +TEST(flags_set) +{ + ASSERT_EQ(-1, sys_pidfd_getfd(0, 0, 1)); + EXPECT_EQ(errno, EINVAL); +} + +#if __NR_pidfd_getfd == -1 +int main(void) +{ + fprintf(stderr, "__NR_pidfd_getfd undefined. The pidfd_getfd syscall is unavailable. Test aborting\n"); + return KSFT_SKIP; +} +#else +TEST_HARNESS_MAIN +#endif diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index 0377133dd6dc..b9fe75fc3e51 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -22,11 +22,6 @@ #include "pidfd.h" #include "../kselftest.h" -static inline int sys_pidfd_open(pid_t pid, unsigned int flags) -{ - return syscall(__NR_pidfd_open, pid, flags); -} - static int safe_int(const char *numstr, int *converted) { char *err = NULL; diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c new file mode 100644 index 000000000000..4b115444dfe9 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest.h" + +static bool timeout; + +static void handle_alarm(int sig) +{ + timeout = true; +} + +int main(int argc, char **argv) +{ + struct pollfd fds; + int iter, nevents; + int nr_iterations = 10000; + + fds.events = POLLIN; + + if (argc > 2) + ksft_exit_fail_msg("Unexpected command line argument\n"); + + if (argc == 2) { + nr_iterations = atoi(argv[1]); + if (nr_iterations <= 0) + ksft_exit_fail_msg("invalid input parameter %s\n", + argv[1]); + } + + ksft_print_msg("running pidfd poll test for %d iterations\n", + nr_iterations); + + for (iter = 0; iter < nr_iterations; iter++) { + int pidfd; + int child_pid = fork(); + + if (child_pid < 0) { + if (errno == EAGAIN) { + iter--; + continue; + } + ksft_exit_fail_msg( + "%s - failed to fork a child process\n", + strerror(errno)); + } + + if (child_pid == 0) { + /* Child process just sleeps for a min and exits */ + sleep(60); + exit(EXIT_SUCCESS); + } + + /* Parent kills the child and waits for its death */ + pidfd = sys_pidfd_open(child_pid, 0); + if (pidfd < 0) + ksft_exit_fail_msg("%s - pidfd_open failed\n", + strerror(errno)); + + /* Setup 3 sec alarm - plenty of time */ + if (signal(SIGALRM, handle_alarm) == SIG_ERR) + ksft_exit_fail_msg("%s - signal failed\n", + strerror(errno)); + alarm(3); + + /* Send SIGKILL to the child */ + if (sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0)) + ksft_exit_fail_msg("%s - pidfd_send_signal failed\n", + strerror(errno)); + + /* Wait for the death notification */ + fds.fd = pidfd; + nevents = poll(&fds, 1, -1); + + /* Check for error conditions */ + if (nevents < 0) + ksft_exit_fail_msg("%s - poll failed\n", + strerror(errno)); + + if (nevents != 1) + ksft_exit_fail_msg("unexpected poll result: %d\n", + nevents); + + if (!(fds.revents & POLLIN)) + ksft_exit_fail_msg( + "unexpected event type received: 0x%x\n", + fds.revents); + + if (timeout) + ksft_exit_fail_msg( + "death notification wait timeout\n"); + + close(pidfd); + /* Wait for child to prevent zombies */ + if (waitpid(child_pid, NULL, 0) < 0) + ksft_exit_fail_msg("%s - waitpid failed\n", + strerror(errno)); + + } + + ksft_test_result_pass("pidfd poll test: pass\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index b632965e60eb..7aff2d3b42c0 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -21,20 +21,12 @@ #include "pidfd.h" #include "../kselftest.h" -#ifndef __NR_pidfd_send_signal -#define __NR_pidfd_send_signal -1 -#endif - #define str(s) _str(s) #define _str(s) #s #define CHILD_THREAD_MIN_WAIT 3 /* seconds */ #define MAX_EVENTS 5 -#ifndef CLONE_PIDFD -#define CLONE_PIDFD 0x00001000 -#endif - static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) { size_t stack_size = 1024; @@ -47,12 +39,6 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *)) #endif } -static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info, - unsigned int flags) -{ - return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags); -} - static int signal_received; static void set_signal_received_on_sigusr1(int sig) diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c new file mode 100644 index 000000000000..7079f8eef792 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE +#include <errno.h> +#include <linux/sched.h> +#include <linux/types.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sched.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "pidfd.h" +#include "../kselftest.h" + +#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) + +static pid_t sys_clone3(struct clone_args *args) +{ + return syscall(__NR_clone3, args, sizeof(struct clone_args)); +} + +static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, + struct rusage *ru) +{ + return syscall(__NR_waitid, which, pid, info, options, ru); +} + +static int test_pidfd_wait_simple(void) +{ + const char *test_name = "pidfd wait simple"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); + if (pidfd < 0) + ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n", + test_name, strerror(errno)); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + if (pid == 0) + ksft_exit_fail_msg( + "%s test: succeeded to wait on invalid pidfd %s\n", + test_name, strerror(errno)); + close(pidfd); + pidfd = -1; + + pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); + if (pidfd == 0) + ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n", + test_name, strerror(errno)); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + if (pid == 0) + ksft_exit_fail_msg( + "%s test: succeeded to wait on invalid pidfd %s\n", + test_name, strerror(errno)); + close(pidfd); + pidfd = -1; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s\n", + test_name, strerror(errno)); + + if (pid == 0) + exit(EXIT_SUCCESS); + + pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + if (pid < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status)) + ksft_exit_fail_msg( + "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + close(pidfd); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_EXITED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + if (info.si_pid != parent_tid) + ksft_exit_fail_msg( + "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_pid, parent_tid, pidfd, + strerror(errno)); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +static int test_pidfd_wait_states(void) +{ + const char *test_name = "pidfd wait states"; + int pidfd = -1, status = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .pidfd = ptr_to_u64(&pidfd), + .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + pid = sys_clone3(&args); + if (pid < 0) + ksft_exit_fail_msg("%s test: failed to create new process %s\n", + test_name, strerror(errno)); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } + + ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_STOPPED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + if (info.si_pid != parent_tid) + ksft_exit_fail_msg( + "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_pid, parent_tid, pidfd, + strerror(errno)); + + ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_CONTINUED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + if (info.si_pid != parent_tid) + ksft_exit_fail_msg( + "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_pid, parent_tid, pidfd, + strerror(errno)); + + ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_STOPPED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + if (info.si_pid != parent_tid) + ksft_exit_fail_msg( + "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_pid, parent_tid, pidfd, + strerror(errno)); + + ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + if (ret < 0) + ksft_exit_fail_msg( + "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n", + test_name, parent_tid, pidfd, strerror(errno)); + + if (info.si_signo != SIGCHLD) + ksft_exit_fail_msg( + "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_signo, parent_tid, pidfd, + strerror(errno)); + + if (info.si_code != CLD_KILLED) + ksft_exit_fail_msg( + "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_code, parent_tid, pidfd, + strerror(errno)); + + if (info.si_pid != parent_tid) + ksft_exit_fail_msg( + "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", + test_name, info.si_pid, parent_tid, pidfd, + strerror(errno)); + + close(pidfd); + + ksft_test_result_pass("%s test: Passed\n", test_name); + return 0; +} + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(2); + + test_pidfd_wait_simple(); + test_pidfd_wait_states(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index b3ad909aefbc..644770c3b754 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -26,6 +26,7 @@ SUB_DIRS = alignment \ switch_endian \ syscalls \ tm \ + eeh \ vphn \ math \ ptrace \ diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index ce12cd0e2967..12ef5b031974 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -1,13 +1,14 @@ copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 -copyuser_power7_t0 -copyuser_power7_t1 +copyuser_p7_t0 +copyuser_p7_t1 memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 -memcpy_power7_t0 -memcpy_power7_t1 +memcpy_p7_t0 +memcpy_p7_t1 copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 +memcpy_mcsafe_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 44574f3818b3..0917983a1c78 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ - memcpy_p7_t0 memcpy_p7_t1 \ + memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -45,6 +45,11 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ -o $@ $^ +$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) + $(CC) $(CPPFLAGS) $(CFLAGS) \ + -D COPY_LOOP=test_memcpy_mcsafe \ + -o $@ $^ + $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ copy_tofrom_user_reference.S stubs.S $(CC) $(CPPFLAGS) $(CFLAGS) \ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h index 05c1663c89b0..e6b80d5fbd14 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/export.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h @@ -1,3 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ #define EXPORT_SYMBOL(x) +#define EXPORT_SYMBOL_GPL(x) #define EXPORT_SYMBOL_KASAN(x) diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S new file mode 120000 index 000000000000..f0feef3062f6 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile new file mode 100644 index 000000000000..b397babd569b --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_PROGS := eeh-basic.sh +TEST_FILES := eeh-functions.sh + +top_srcdir = ../../../../.. +include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh new file mode 100755 index 000000000000..f988d2f42e8f --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +if ! eeh_supported ; then + echo "EEH not supported on this system, skipping" + exit 0; +fi + +if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ + [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then + echo "debugfs EEH testing files are missing. Is debugfs mounted?" + exit 1; +fi + +pre_lspci=`mktemp` +lspci > $pre_lspci + +# Bump the max freeze count to something absurd so we don't +# trip over it while breaking things. +echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes + +# record the devices that we break in here. Assuming everything +# goes to plan we should get them back once the recover process +# is finished. +devices="" + +# Build up a list of candidate devices. +for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do + # skip bridges since we can't recover them (yet...) + if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then + echo "$dev, Skipped: bridge" + continue; + fi + + # Skip VFs for now since we don't have a reliable way + # to break them. + if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then + echo "$dev, Skipped: virtfn" + continue; + fi + + # Don't inject errosr into an already-frozen PE. This happens with + # PEs that contain multiple PCI devices (e.g. multi-function cards) + # and injecting new errors during the recovery process will probably + # result in the recovery failing and the device being marked as + # failed. + if ! pe_ok $dev ; then + echo "$dev, Skipped: Bad initial PE state" + continue; + fi + + echo "$dev, Added" + + # Add to this list of device to check + devices="$devices $dev" +done + +dev_count="$(echo $devices | wc -w)" +echo "Found ${dev_count} breakable devices..." + +failed=0 +for dev in $devices ; do + echo "Breaking $dev..." + + if ! pe_ok $dev ; then + echo "Skipping $dev, Initial PE state is not ok" + failed="$((failed + 1))" + continue; + fi + + if ! eeh_one_dev $dev ; then + failed="$((failed + 1))" + fi +done + +echo "$failed devices failed to recover ($dev_count tested)" +lspci | diff -u $pre_lspci - +rm -f $pre_lspci + +exit $failed diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh new file mode 100755 index 000000000000..f52ed92b53e7 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -0,0 +1,80 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +pe_ok() { + local dev="$1" + local path="/sys/bus/pci/devices/$dev/eeh_pe_state" + + if ! [ -e "$path" ] ; then + return 1; + fi + + local fw_state="$(cut -d' ' -f1 < $path)" + local sw_state="$(cut -d' ' -f2 < $path)" + + # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an + # error state or being recovered. Either way, not ok. + if [ "$((sw_state & 0x3))" -ne 0 ] ; then + return 1 + fi + + # A functioning PE should have the EEH_STATE_MMIO_ACTIVE and + # EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason + # the platform backends set these when the PE is in reset. The + # RECOVERING check above should stop any false positives though. + if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then + return 1 + fi + + return 0; +} + +eeh_supported() { + test -e /proc/powerpc/eeh && \ + grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh +} + +eeh_one_dev() { + local dev="$1" + + # Using this function from the command line is sometimes useful for + # testing so check that the argument is a well-formed sysfs device + # name. + if ! test -e /sys/bus/pci/devices/$dev/ ; then + echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" + return 1; + fi + + # Break it + echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break + + # Force an EEH device check. If the kernel has already + # noticed the EEH (due to a driver poll or whatever), this + # is a no-op. + echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check + + # Default to a 60s timeout when waiting for a device to recover. This + # is an arbitrary default which can be overridden by setting the + # EEH_MAX_WAIT environmental variable when required. + + # The current record holder for longest recovery time is: + # "Adaptec Series 8 12G SAS/PCIe 3" at 39 seconds + max_wait=${EEH_MAX_WAIT:=60} + + for i in `seq 0 ${max_wait}` ; do + if pe_ok $dev ; then + break; + fi + echo "$dev, waited $i/${max_wait}" + sleep 1 + done + + if ! pe_ok $dev ; then + echo "$dev, Failed to recover!" + return 1; + fi + + echo "$dev, Recovered after $i seconds" + return 0; +} + diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 0e2b2e6284ac..e089a0c30d9a 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -34,6 +34,7 @@ int pick_online_cpu(void); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); +int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); void set_dscr(unsigned long val); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 7101ffd08d66..0ebeaea22641 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -5,3 +5,4 @@ prot_sao segv_errors wild_bctr large_vm_fork_separation +bad_accesses diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index f1fbc15800c4..b9103c4bb414 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -3,7 +3,8 @@ noarg: $(MAKE) -C ../ TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ - large_vm_fork_separation + large_vm_fork_separation bad_accesses +TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile top_srcdir = ../../../../.. @@ -15,7 +16,9 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 +$(OUTPUT)/bad_accesses: CFLAGS += -m64 $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 +$(OUTPUT)/tlbie_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c new file mode 100644 index 000000000000..adc465f499ef --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Copyright 2019, Michael Ellerman, IBM Corp. +// +// Test that out-of-bounds reads/writes behave as expected. + +#include <setjmp.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "utils.h" + +// Old distros (Ubuntu 16.04 at least) don't define this +#ifndef SEGV_BNDERR +#define SEGV_BNDERR 3 +#endif + +// 64-bit kernel is always here +#define PAGE_OFFSET (0xcul << 60) + +static unsigned long kernel_virt_end; + +static volatile int fault_code; +static volatile unsigned long fault_addr; +static jmp_buf setjmp_env; + +static void segv_handler(int n, siginfo_t *info, void *ctxt_v) +{ + fault_code = info->si_code; + fault_addr = (unsigned long)info->si_addr; + siglongjmp(setjmp_env, 1); +} + +int bad_access(char *p, bool write) +{ + char x; + + fault_code = 0; + fault_addr = 0; + + if (sigsetjmp(setjmp_env, 1) == 0) { + if (write) + *p = 1; + else + x = *p; + + printf("Bad - no SEGV! (%c)\n", x); + return 1; + } + + // If we see MAPERR that means we took a page fault rather than an SLB + // miss. We only expect to take page faults for addresses within the + // valid kernel range. + FAIL_IF(fault_code == SEGV_MAPERR && \ + (fault_addr < PAGE_OFFSET || fault_addr >= kernel_virt_end)); + + FAIL_IF(fault_code != SEGV_MAPERR && fault_code != SEGV_BNDERR); + + return 0; +} + +static int using_hash_mmu(bool *using_hash) +{ + char line[128]; + FILE *f; + int rc; + + f = fopen("/proc/cpuinfo", "r"); + FAIL_IF(!f); + + rc = 0; + while (fgets(line, sizeof(line), f) != NULL) { + if (strcmp(line, "MMU : Hash\n") == 0) { + *using_hash = true; + goto out; + } + + if (strcmp(line, "MMU : Radix\n") == 0) { + *using_hash = false; + goto out; + } + } + + rc = -1; +out: + fclose(f); + return rc; +} + +static int test(void) +{ + unsigned long i, j, addr, region_shift, page_shift, page_size; + struct sigaction sig; + bool hash_mmu; + + sig = (struct sigaction) { + .sa_sigaction = segv_handler, + .sa_flags = SA_SIGINFO, + }; + + FAIL_IF(sigaction(SIGSEGV, &sig, NULL) != 0); + + FAIL_IF(using_hash_mmu(&hash_mmu)); + + page_size = sysconf(_SC_PAGESIZE); + if (page_size == (64 * 1024)) + page_shift = 16; + else + page_shift = 12; + + if (page_size == (64 * 1024) || !hash_mmu) { + region_shift = 52; + + // We have 7 512T regions (4 kernel linear, vmalloc, io, vmemmap) + kernel_virt_end = PAGE_OFFSET + (7 * (512ul << 40)); + } else if (page_size == (4 * 1024) && hash_mmu) { + region_shift = 46; + + // We have 7 64T regions (4 kernel linear, vmalloc, io, vmemmap) + kernel_virt_end = PAGE_OFFSET + (7 * (64ul << 40)); + } else + FAIL_IF(true); + + printf("Using %s MMU, PAGE_SIZE = %dKB start address 0x%016lx\n", + hash_mmu ? "hash" : "radix", + (1 << page_shift) >> 10, + 1ul << region_shift); + + // This generates access patterns like: + // 0x0010000000000000 + // 0x0010000000010000 + // 0x0010000000020000 + // ... + // 0x0014000000000000 + // 0x0018000000000000 + // 0x0020000000000000 + // 0x0020000000010000 + // 0x0020000000020000 + // ... + // 0xf400000000000000 + // 0xf800000000000000 + + for (i = 1; i <= ((0xful << 60) >> region_shift); i++) { + for (j = page_shift - 1; j < 60; j++) { + unsigned long base, delta; + + base = i << region_shift; + delta = 1ul << j; + + if (delta >= base) + break; + + addr = (base | delta) & ~((1 << page_shift) - 1); + + FAIL_IF(bad_access((char *)addr, false)); + FAIL_IF(bad_access((char *)addr, true)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(test, "bad_accesses"); +} diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c new file mode 100644 index 000000000000..f85a0938ab25 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp. + */ + +/* + * + * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store + * sequence in a loop. The same threads also rung a context switch task + * that does sched_yield() in loop. + * + * The snapshot thread mark the mmap area PROT_READ in between, make a copy + * and copy it back to the original area. This helps us to detect if any + * store continued to happen after we marked the memory PROT_READ. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <linux/futex.h> +#include <unistd.h> +#include <asm/unistd.h> +#include <string.h> +#include <stdlib.h> +#include <fcntl.h> +#include <sched.h> +#include <time.h> +#include <stdarg.h> +#include <sched.h> +#include <pthread.h> +#include <signal.h> +#include <sys/prctl.h> + +static inline void dcbf(volatile unsigned int *addr) +{ + __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory"); +} + +static void err_msg(char *msg) +{ + + time_t now; + time(&now); + printf("=================================\n"); + printf(" Error: %s\n", msg); + printf(" %s", ctime(&now)); + printf("=================================\n"); + exit(1); +} + +static char *map1; +static char *map2; +static pid_t rim_process_pid; + +/* + * A "rim-sequence" is defined to be the sequence of the following + * operations performed on a memory word: + * 1) FLUSH the contents of that word. + * 2) LOAD the contents of that word. + * 3) COMPARE the contents of that word with the content that was + * previously stored at that word + * 4) STORE new content into that word. + * + * The threads in this test that perform the rim-sequence are termed + * as rim_threads. + */ + +/* + * A "corruption" is defined to be the failed COMPARE operation in a + * rim-sequence. + * + * A rim_thread that detects a corruption informs about it to all the + * other rim_threads, and the mem_snapshot thread. + */ +static volatile unsigned int corruption_found; + +/* + * This defines the maximum number of rim_threads in this test. + * + * The THREAD_ID_BITS denote the number of bits required + * to represent the thread_ids [0..MAX_THREADS - 1]. + * We are being a bit paranoid here and set it to 8 bits, + * though 6 bits suffice. + * + */ +#define MAX_THREADS 64 +#define THREAD_ID_BITS 8 +#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1) +static unsigned int rim_thread_ids[MAX_THREADS]; +static pthread_t rim_threads[MAX_THREADS]; + + +/* + * Each rim_thread works on an exclusive "chunk" of size + * RIM_CHUNK_SIZE. + * + * The ith rim_thread works on the ith chunk. + * + * The ith chunk begins at + * map1 + (i * RIM_CHUNK_SIZE) + */ +#define RIM_CHUNK_SIZE 1024 +#define BITS_PER_BYTE 8 +#define WORD_SIZE (sizeof(unsigned int)) +#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE) +#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE) + +static inline char *compute_chunk_start_addr(unsigned int thread_id) +{ + char *chunk_start; + + chunk_start = (char *)((unsigned long)map1 + + (thread_id * RIM_CHUNK_SIZE)); + + return chunk_start; +} + +/* + * The "word-offset" of a word-aligned address inside a chunk, is + * defined to be the number of words that precede the address in that + * chunk. + * + * WORD_OFFSET_BITS denote the number of bits required to represent + * the word-offsets of all the word-aligned addresses of a chunk. + */ +#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK)) +#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1) + +static inline unsigned int compute_word_offset(char *start, unsigned int *addr) +{ + unsigned int delta_bytes, ret; + delta_bytes = (unsigned long)addr - (unsigned long)start; + + ret = delta_bytes/WORD_SIZE; + + return ret; +} + +/* + * A "sweep" is defined to be the sequential execution of the + * rim-sequence by a rim_thread on its chunk one word at a time, + * starting from the first word of its chunk and ending with the last + * word of its chunk. + * + * Each sweep of a rim_thread is uniquely identified by a sweep_id. + * SWEEP_ID_BITS denote the number of bits required to represent + * the sweep_ids of rim_threads. + * + * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS, + * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below. + */ +#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS)) +#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1) + +/* + * A "store-pattern" is the word-pattern that is stored into a word + * location in the 4)STORE step of the rim-sequence. + * + * In the store-pattern, we shall encode: + * + * - The thread-id of the rim_thread performing the store + * (The most significant THREAD_ID_BITS) + * + * - The word-offset of the address into which the store is being + * performed (The next WORD_OFFSET_BITS) + * + * - The sweep_id of the current sweep in which the store is + * being performed. (The lower SWEEP_ID_BITS) + * + * Store Pattern: 32 bits + * |------------------|--------------------|---------------------------------| + * | Thread id | Word offset | sweep_id | + * |------------------|--------------------|---------------------------------| + * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS + * + * In the store pattern, the (Thread-id + Word-offset) uniquely identify the + * address to which the store is being performed i.e, + * address == map1 + + * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE) + * + * And the sweep_id in the store pattern identifies the time when the + * store was performed by the rim_thread. + * + * We shall use this property in the 3)COMPARE step of the + * rim-sequence. + */ +#define SWEEP_ID_SHIFT 0 +#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS) +#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS) + +/* + * Compute the store pattern for a given thread with id @tid, at + * location @addr in the sweep identified by @sweep_id + */ +static inline unsigned int compute_store_pattern(unsigned int tid, + unsigned int *addr, + unsigned int sweep_id) +{ + unsigned int ret = 0; + char *start = compute_chunk_start_addr(tid); + unsigned int word_offset = compute_word_offset(start, addr); + + ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT; + ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT; + ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT; + return ret; +} + +/* Extract the thread-id from the given store-pattern */ +static inline unsigned int extract_tid(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK; + return ret; +} + +/* Extract the word-offset from the given store-pattern */ +static inline unsigned int extract_word_offset(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK; + + return ret; +} + +/* Extract the sweep-id from the given store-pattern */ +static inline unsigned int extract_sweep_id(unsigned int pattern) + +{ + unsigned int ret; + + ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK; + + return ret; +} + +/************************************************************ + * * + * Logging the output of the verification * + * * + ************************************************************/ +#define LOGDIR_NAME_SIZE 100 +static char logdir[LOGDIR_NAME_SIZE]; + +static FILE *fp[MAX_THREADS]; +static const char logfilename[] ="Thread-%02d-Chunk"; + +static inline void start_verification_log(unsigned int tid, + unsigned int *addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + FILE *f; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[2] = "/"; + char *chunk_start = compute_chunk_start_addr(tid); + unsigned int size = RIM_CHUNK_SIZE; + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + f = fopen(path, "w"); + + if (!f) { + err_msg("Unable to create logfile\n"); + } + + fp[tid] = f; + + fprintf(f, "----------------------------------------------------------\n"); + fprintf(f, "PID = %d\n", rim_process_pid); + fprintf(f, "Thread id = %02d\n", tid); + fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start); + fprintf(f, "Chunk Size = %d\n", size); + fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr); + fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id); + fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void log_anamoly(unsigned int tid, unsigned int *addr, + unsigned int expected, unsigned int observed) +{ + FILE *f = fp[tid]; + + fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n", + tid, (unsigned long)addr, expected, observed); + fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected)); + fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed)); + fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected)); + fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed)); + fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected)); + fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed)); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) +{ + FILE *f = fp[tid]; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[] = "/"; + + fclose(f); + + if (nr_anamolies == 0) { + remove(path); + return; + } + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", + tid, nr_anamolies, path); +} + +/* + * When a COMPARE step of a rim-sequence fails, the rim_thread informs + * everyone else via the shared_memory pointed to by + * corruption_found variable. On seeing this, every thread verifies the + * content of its chunk as follows. + * + * Suppose a thread identified with @tid was about to store (but not + * yet stored) to @next_store_addr in its current sweep identified + * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id. + * + * This implies that for all the addresses @addr < @next_store_addr, + * Thread @tid has already performed a store as part of its current + * sweep. Hence we expect the content of such @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | cur_sweep_id | + * |-------------------------------------------------| + * + * Since Thread @tid is yet to perform stores on address + * @next_store_addr and above, we expect the content of such an + * address @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | prev_sweep_id | + * |-------------------------------------------------| + * + * The verifier function @verify_chunk does this verification and logs + * any anamolies that it finds. + */ +static void verify_chunk(unsigned int tid, unsigned int *next_store_addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + unsigned int *iter_ptr; + unsigned int size = RIM_CHUNK_SIZE; + unsigned int expected; + unsigned int observed; + char *chunk_start = compute_chunk_start_addr(tid); + + int nr_anamolies = 0; + + start_verification_log(tid, next_store_addr, + cur_sweep_id, prev_sweep_id); + + for (iter_ptr = (unsigned int *)chunk_start; + (unsigned long)iter_ptr < (unsigned long)chunk_start + size; + iter_ptr++) { + unsigned int expected_sweep_id; + + if (iter_ptr < next_store_addr) { + expected_sweep_id = cur_sweep_id; + } else { + expected_sweep_id = prev_sweep_id; + } + + expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id); + + dcbf((volatile unsigned int*)iter_ptr); //Flush before reading + observed = *iter_ptr; + + if (observed != expected) { + nr_anamolies++; + log_anamoly(tid, iter_ptr, expected, observed); + } + } + + end_verification_log(tid, nr_anamolies); +} + +static void set_pthread_cpu(pthread_t th, int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + /* haven't reproduced with this setting, it kills random preemption which may be a factor */ + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static void set_mycpu(int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static volatile int segv_wait; + +static void segv_handler(int signo, siginfo_t *info, void *extra) +{ + while (segv_wait) { + sched_yield(); + } + +} + +static void set_segv_handler(void) +{ + struct sigaction sa; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = segv_handler; + + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +int timeout = 0; +/* + * This function is executed by every rim_thread. + * + * This function performs sweeps over the exclusive chunks of the + * rim_threads executing the rim-sequence one word at a time. + */ +static void *rim_fn(void *arg) +{ + unsigned int tid = *((unsigned int *)arg); + + int size = RIM_CHUNK_SIZE; + char *chunk_start = compute_chunk_start_addr(tid); + + unsigned int prev_sweep_id; + unsigned int cur_sweep_id = 0; + + /* word access */ + unsigned int pattern = cur_sweep_id; + unsigned int *pattern_ptr = &pattern; + unsigned int *w_ptr, read_data; + + set_segv_handler(); + + /* + * Let us initialize the chunk: + * + * Each word-aligned address addr in the chunk, + * is initialized to : + * |-------------------------------------------------| + * | tid | word_offset(addr) | 0 | + * |-------------------------------------------------| + */ + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + *w_ptr = *pattern_ptr; + } + + while (!corruption_found && !timeout) { + prev_sweep_id = cur_sweep_id; + cur_sweep_id = cur_sweep_id + 1; + + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + unsigned int old_pattern; + + /* + * Compute the pattern that we would have + * stored at this location in the previous + * sweep. + */ + old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id); + + /* + * FLUSH:Ensure that we flush the contents of + * the cache before loading + */ + dcbf((volatile unsigned int*)w_ptr); //Flush + + /* LOAD: Read the value */ + read_data = *w_ptr; //Load + + /* + * COMPARE: Is it the same as what we had stored + * in the previous sweep ? It better be! + */ + if (read_data != old_pattern) { + /* No it isn't! Tell everyone */ + corruption_found = 1; + } + + /* + * Before performing a store, let us check if + * any rim_thread has found a corruption. + */ + if (corruption_found || timeout) { + /* + * Yes. Someone (including us!) has found + * a corruption :( + * + * Let us verify that our chunk is + * correct. + */ + /* But first, let us allow the dust to settle down! */ + verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id); + + return 0; + } + + /* + * Compute the new pattern that we are going + * to write to this location + */ + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + + /* + * STORE: Now let us write this pattern into + * the location + */ + *w_ptr = *pattern_ptr; + } + } + + return NULL; +} + + +static unsigned long start_cpu = 0; +static unsigned long nrthreads = 4; + +static pthread_t mem_snapshot_thread; + +static void *mem_snapshot_fn(void *arg) +{ + int page_size = getpagesize(); + size_t size = page_size; + void *tmp = malloc(size); + + while (!corruption_found && !timeout) { + /* Stop memory migration once corruption is found */ + segv_wait = 1; + + mprotect(map1, size, PROT_READ); + + /* + * Load from the working alias (map1). Loading from map2 + * also fails. + */ + memcpy(tmp, map1, size); + + /* + * Stores must go via map2 which has write permissions, but + * the corrupted data tends to be seen in the snapshot buffer, + * so corruption does not appear to be introduced at the + * copy-back via map2 alias here. + */ + memcpy(map2, tmp, size); + /* + * Before releasing other threads, must ensure the copy + * back to + */ + asm volatile("sync" ::: "memory"); + mprotect(map1, size, PROT_READ|PROT_WRITE); + asm volatile("sync" ::: "memory"); + segv_wait = 0; + + usleep(1); /* This value makes a big difference */ + } + + return 0; +} + +void alrm_sighandler(int sig) +{ + timeout = 1; +} + +int main(int argc, char *argv[]) +{ + int c; + int page_size = getpagesize(); + time_t now; + int i, dir_error; + pthread_attr_t attr; + key_t shm_key = (key_t) getpid(); + int shmid, run_time = 20 * 60; + struct sigaction sa_alrm; + + snprintf(logdir, LOGDIR_NAME_SIZE, + "/tmp/logdir-%u", (unsigned int)getpid()); + while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) { + switch(c) { + case 'r': + start_cpu = strtoul(optarg, NULL, 10); + break; + case 'h': + printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]); + exit(0); + break; + case 'n': + nrthreads = strtoul(optarg, NULL, 10); + break; + case 'l': + strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); + break; + case 't': + run_time = strtoul(optarg, NULL, 10); + break; + default: + printf("invalid option\n"); + exit(0); + break; + } + } + + if (nrthreads > MAX_THREADS) + nrthreads = MAX_THREADS; + + shmid = shmget(shm_key, page_size, IPC_CREAT|0666); + if (shmid < 0) { + err_msg("Failed shmget\n"); + } + + map1 = shmat(shmid, NULL, 0); + if (map1 == (void *) -1) { + err_msg("Failed shmat"); + } + + map2 = shmat(shmid, NULL, 0); + if (map2 == (void *) -1) { + err_msg("Failed shmat"); + } + + dir_error = mkdir(logdir, 0755); + + if (dir_error) { + err_msg("Failed mkdir"); + } + + printf("start_cpu list:%lu\n", start_cpu); + printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads); + printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2); + printf("logdir at : %s\n", logdir); + printf("Timeout: %d seconds\n", run_time); + + time(&now); + printf("=================================\n"); + printf(" Starting Test\n"); + printf(" %s", ctime(&now)); + printf("=================================\n"); + + for (i = 0; i < nrthreads; i++) { + if (1 && !fork()) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + set_mycpu(start_cpu + i); + for (;;) + sched_yield(); + exit(0); + } + } + + + sa_alrm.sa_handler = &alrm_sighandler; + sigemptyset(&sa_alrm.sa_mask); + sa_alrm.sa_flags = 0; + + if (sigaction(SIGALRM, &sa_alrm, 0) == -1) { + err_msg("Failed signal handler registration\n"); + } + + alarm(run_time); + + pthread_attr_init(&attr); + for (i = 0; i < nrthreads; i++) { + rim_thread_ids[i] = i; + pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]); + set_pthread_cpu(rim_threads[i], start_cpu + i); + } + + pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1); + set_pthread_cpu(mem_snapshot_thread, start_cpu + i); + + + pthread_join(mem_snapshot_thread, NULL); + for (i = 0; i < nrthreads; i++) { + pthread_join(rim_threads[i], NULL); + } + + if (!timeout) { + time(&now); + printf("=================================\n"); + printf(" Data Corruption Detected\n"); + printf(" %s", ctime(&now)); + printf(" See logfiles in %s\n", logdir); + printf("=================================\n"); + return 1; + } + return 0; +} diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 23f4caf48ffc..417306353e07 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +include ../../../../../../scripts/Kbuild.include + noarg: $(MAKE) -C ../../ @@ -6,7 +8,10 @@ noarg: CFLAGS += -m64 # Toolchains may build PIE by default which breaks the assembly -LDFLAGS += -no-pie +no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) + +LDFLAGS += $(no-pie-option) TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore index 07ec449a2767..dce19f221c46 100644 --- a/tools/testing/selftests/powerpc/ptrace/.gitignore +++ b/tools/testing/selftests/powerpc/ptrace/.gitignore @@ -10,3 +10,6 @@ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak perf-hwbreak +core-pkey +ptrace-pkey +ptrace-syscall diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index 200337daec42..c1f324afdbf3 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -148,6 +148,121 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest) return 0; } +static int runtest_dar_outside(void) +{ + void *target; + volatile __u16 temp16; + volatile __u64 temp64; + struct perf_event_attr attr; + int break_fd; + unsigned long long breaks; + int fail = 0; + size_t res; + + target = malloc(8); + if (!target) { + perror("malloc failed"); + exit(EXIT_FAILURE); + } + + /* setup counters */ + memset(&attr, 0, sizeof(attr)); + attr.disabled = 1; + attr.type = PERF_TYPE_BREAKPOINT; + attr.exclude_kernel = 1; + attr.exclude_hv = 1; + attr.exclude_guest = 1; + attr.bp_type = HW_BREAKPOINT_RW; + /* watch middle half of target array */ + attr.bp_addr = (__u64)(target + 2); + attr.bp_len = 4; + break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + if (break_fd < 0) { + free(target); + perror("sys_perf_event_open"); + exit(EXIT_FAILURE); + } + + /* Shouldn't hit. */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)target); + *((__u16 *)target) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 0) { + printf("TESTED: No overlap\n"); + } else { + printf("FAILED: No overlap: %lld != 0\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 1)); + *((__u16 *)(target + 1)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Partial overlap\n"); + } else { + printf("FAILED: Partial overlap: %lld != 2\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 5)); + *((__u16 *)(target + 5)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Partial overlap\n"); + } else { + printf("FAILED: Partial overlap: %lld != 2\n", breaks); + fail = 1; + } + + /* Shouldn't Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp16 = *((__u16 *)(target + 6)); + *((__u16 *)(target + 6)) = temp16; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 0) { + printf("TESTED: No overlap\n"); + } else { + printf("FAILED: No overlap: %lld != 0\n", breaks); + fail = 1; + } + + /* Hit */ + ioctl(break_fd, PERF_EVENT_IOC_RESET); + ioctl(break_fd, PERF_EVENT_IOC_ENABLE); + temp64 = *((__u64 *)target); + *((__u64 *)target) = temp64; + ioctl(break_fd, PERF_EVENT_IOC_DISABLE); + res = read(break_fd, &breaks, sizeof(unsigned long long)); + assert(res == sizeof(unsigned long long)); + if (breaks == 2) { + printf("TESTED: Full overlap\n"); + } else { + printf("FAILED: Full overlap: %lld != 2\n", breaks); + fail = 1; + } + + free(target); + close(break_fd); + return fail; +} + static int runtest(void) { int rwflag; @@ -172,7 +287,9 @@ static int runtest(void) return ret; } } - return 0; + + ret = runtest_dar_outside(); + return ret; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 3066d310f32b..fc477dfe86a2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -22,321 +22,485 @@ #include <sys/wait.h> #include "ptrace.h" -/* Breakpoint access modes */ -enum { - BP_X = 1, - BP_RW = 2, - BP_W = 4, -}; - -static pid_t child_pid; -static struct ppc_debug_info dbginfo; - -static void get_dbginfo(void) -{ - int ret; +#define SPRN_PVR 0x11F +#define PVR_8xx 0x00500000 - ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); - if (ret) { - perror("Can't get breakpoint info\n"); - exit(-1); - } -} +bool is_8xx; -static bool hwbreak_present(void) -{ - return (dbginfo.num_data_bps != 0); -} +/* + * Use volatile on all global var so that compiler doesn't + * optimise their load/stores. Otherwise selftest can fail. + */ +static volatile __u64 glvar; -static bool dawr_present(void) -{ - return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); -} +#define DAWR_MAX_LEN 512 +static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512))); -static void set_breakpoint_addr(void *addr) -{ - int ret; +#define A_LEN 6 +#define B_LEN 6 +struct gstruct { + __u8 a[A_LEN]; /* double word aligned */ + __u8 b[B_LEN]; /* double word unaligned */ +}; +static volatile struct gstruct gstruct __attribute__((aligned(512))); - ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr); - if (ret) { - perror("Can't set breakpoint addr\n"); - exit(-1); - } -} -static int set_hwbreakpoint_addr(void *addr, int range) +static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) { - int ret; - - struct ppc_hw_breakpoint info; - - info.version = 1; - info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW; - info.addr_mode = PPC_BREAKPOINT_MODE_EXACT; - if (range > 0) - info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; - info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info.addr = (__u64)addr; - info.addr2 = (__u64)addr + range; - info.condition_value = 0; - - ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); - if (ret < 0) { - perror("Can't set breakpoint\n"); + if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { + perror("Can't get breakpoint info"); exit(-1); } - return ret; } -static int del_hwbreakpoint_addr(int watchpoint_handle) +static bool dawr_present(struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle); - if (ret < 0) { - perror("Can't delete hw breakpoint\n"); - exit(-1); - } - return ret; + return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR); } -#define DAWR_LENGTH_MAX 512 - -/* Dummy variables to test read/write accesses */ -static unsigned long long - dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)] - __attribute__((aligned(512))); -static unsigned long long *dummy_var = dummy_array; - static void write_var(int len) { - long long *plval; - char *pcval; - short *psval; - int *pival; + __u8 *pcvar; + __u16 *psvar; + __u32 *pivar; + __u64 *plvar; switch (len) { case 1: - pcval = (char *)dummy_var; - *pcval = 0xff; + pcvar = (__u8 *)&glvar; + *pcvar = 0xff; break; case 2: - psval = (short *)dummy_var; - *psval = 0xffff; + psvar = (__u16 *)&glvar; + *psvar = 0xffff; break; case 4: - pival = (int *)dummy_var; - *pival = 0xffffffff; + pivar = (__u32 *)&glvar; + *pivar = 0xffffffff; break; case 8: - plval = (long long *)dummy_var; - *plval = 0xffffffffffffffffLL; + plvar = (__u64 *)&glvar; + *plvar = 0xffffffffffffffffLL; break; } } static void read_var(int len) { - char cval __attribute__((unused)); - short sval __attribute__((unused)); - int ival __attribute__((unused)); - long long lval __attribute__((unused)); + __u8 cvar __attribute__((unused)); + __u16 svar __attribute__((unused)); + __u32 ivar __attribute__((unused)); + __u64 lvar __attribute__((unused)); switch (len) { case 1: - cval = *(char *)dummy_var; + cvar = (__u8)glvar; break; case 2: - sval = *(short *)dummy_var; + svar = (__u16)glvar; break; case 4: - ival = *(int *)dummy_var; + ivar = (__u32)glvar; break; case 8: - lval = *(long long *)dummy_var; + lvar = (__u64)glvar; break; } } -/* - * Do the r/w accesses to trigger the breakpoints. And run - * the usual traps. - */ -static void trigger_tests(void) +static void test_workload(void) { - int len, ret; + __u8 cvar __attribute__((unused)); + __u32 ivar __attribute__((unused)); + int len = 0; - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - perror("Can't be traced?\n"); - return; + if (ptrace(PTRACE_TRACEME, 0, NULL, 0)) { + perror("Child can't be traced?"); + exit(-1); } /* Wake up father so that it sets up the first test */ kill(getpid(), SIGUSR1); - /* Test write watchpoints */ - for (len = 1; len <= sizeof(long); len <<= 1) + /* PTRACE_SET_DEBUGREG, WO test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) write_var(len); - /* Test read/write watchpoints (on read accesses) */ - for (len = 1; len <= sizeof(long); len <<= 1) + /* PTRACE_SET_DEBUGREG, RO test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) read_var(len); - /* Test when breakpoint is unset */ - - /* Test write watchpoints */ - for (len = 1; len <= sizeof(long); len <<= 1) - write_var(len); + /* PTRACE_SET_DEBUGREG, RW test */ + for (len = 1; len <= sizeof(glvar); len <<= 1) { + if (rand() % 2) + read_var(len); + else + write_var(len); + } - /* Test read/write watchpoints (on read accesses) */ - for (len = 1; len <= sizeof(long); len <<= 1) - read_var(len); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ + write_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */ + read_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */ + if (rand() % 2) + write_var(1); + else + read_var(1); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ + gstruct.a[rand() % A_LEN] = 'a'; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */ + cvar = gstruct.a[rand() % A_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */ + if (rand() % 2) + gstruct.a[rand() % A_LEN] = 'a'; + else + cvar = gstruct.a[rand() % A_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */ + gstruct.b[rand() % B_LEN] = 'b'; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */ + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */ + if (rand() % 2) + gstruct.b[rand() % B_LEN] = 'b'; + else + cvar = gstruct.b[rand() % B_LEN]; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */ + if (rand() % 2) + *((int *)(gstruct.a + 4)) = 10; + else + ivar = *((int *)(gstruct.a + 4)); + + /* PPC_PTRACE_SETHWDEBUG. DAWR_MAX_LEN. RW test */ + if (rand() % 2) + big_var[rand() % DAWR_MAX_LEN] = 'a'; + else + cvar = big_var[rand() % DAWR_MAX_LEN]; } -static void check_success(const char *msg) +static void check_success(pid_t child_pid, const char *name, const char *type, + unsigned long saddr, int len) { - const char *msg2; int status; + siginfo_t siginfo; + unsigned long eaddr = (saddr + len - 1) | 0x7; + + saddr &= ~0x7; /* Wait for the child to SIGTRAP */ wait(&status); - msg2 = "Failed"; + ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &siginfo); - if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { - msg2 = "Child process hit the breakpoint"; + if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP || + (unsigned long)siginfo.si_addr < saddr || + (unsigned long)siginfo.si_addr > eaddr) { + printf("%s, %s, len: %d: Fail\n", name, type, len); + exit(-1); } - printf("%s Result: [%s]\n", msg, msg2); + printf("%s, %s, len: %d: Ok\n", name, type, len); + + if (!is_8xx) { + /* + * For ptrace registered watchpoint, signal is generated + * before executing load/store. Singlestep the instruction + * and then continue the test. + */ + ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0); + wait(NULL); + } } -static void launch_watchpoints(char *buf, int mode, int len, - struct ppc_debug_info *dbginfo, bool dawr) +static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr) { - const char *mode_str; - unsigned long data = (unsigned long)(dummy_var); - int wh, range; - - data &= ~0x7UL; - - if (mode == BP_W) { - data |= (1UL << 1); - mode_str = "write"; - } else { - data |= (1UL << 0); - data |= (1UL << 1); - mode_str = "read"; + if (ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, wp_addr)) { + perror("PTRACE_SET_DEBUGREG failed"); + exit(-1); } +} - /* Set DABR_TRANSLATION bit */ - data |= (1UL << 2); - - /* use PTRACE_SET_DEBUGREG breakpoints */ - set_breakpoint_addr((void *)data); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - set_breakpoint_addr(NULL); +static int ptrace_sethwdebug(pid_t child_pid, struct ppc_hw_breakpoint *info) +{ + int wh = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, info); - data = (data & ~7); /* remove dabr control bits */ + if (wh <= 0) { + perror("PPC_PTRACE_SETHWDEBUG failed"); + exit(-1); + } + return wh; +} - /* use PPC_PTRACE_SETHWDEBUG breakpoint */ - if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) - return; /* not supported */ - wh = set_hwbreakpoint_addr((void *)data, 0); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - del_hwbreakpoint_addr(wh); - - /* try a wider range */ - range = 8; - if (dawr) - range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1)); - wh = set_hwbreakpoint_addr((void *)data, range); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len); - check_success(buf); - /* Unregister hw brkpoint */ - del_hwbreakpoint_addr(wh); +static void ptrace_delhwdebug(pid_t child_pid, int wh) +{ + if (ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, wh) < 0) { + perror("PPC_PTRACE_DELHWDEBUG failed"); + exit(-1); + } } -/* Set the breakpoints and check the child successfully trigger them */ -static int launch_tests(bool dawr) +#define DABR_READ_SHIFT 0 +#define DABR_WRITE_SHIFT 1 +#define DABR_TRANSLATION_SHIFT 2 + +static int test_set_debugreg(pid_t child_pid) { - char buf[1024]; - int len, i, status; + unsigned long wp_addr = (unsigned long)&glvar; + char *name = "PTRACE_SET_DEBUGREG"; + int len; + + /* PTRACE_SET_DEBUGREG, WO test*/ + wp_addr &= ~0x7UL; + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + } - struct ppc_debug_info dbginfo; + /* PTRACE_SET_DEBUGREG, RO test */ + wp_addr &= ~0x7UL; + wp_addr |= (1UL << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + } - i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo); - if (i) { - perror("Can't set breakpoint info\n"); - exit(-1); + /* PTRACE_SET_DEBUGREG, RW test */ + wp_addr &= ~0x7UL; + wp_addr |= (1Ul << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + for (len = 1; len <= sizeof(glvar); len <<= 1) { + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); } - if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE)) - printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n"); - /* Write watchpoint */ - for (len = 1; len <= sizeof(long); len <<= 1) - launch_watchpoints(buf, BP_W, len, &dbginfo, dawr); + ptrace_set_debugreg(child_pid, 0); + return 0; +} - /* Read-Write watchpoint */ - for (len = 1; len <= sizeof(long); len <<= 1) - launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr); +static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, + unsigned long addr, int len) +{ + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (__u64)addr; + info->addr2 = (__u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; +} +static void test_sethwdebug_exact(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr = (unsigned long)&glvar; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; + int len = 1; /* hardcoded in kernel */ + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - /* - * Now we have unregistered the breakpoint, access by child - * should not cause SIGTRAP. - */ + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - wait(&status); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} - if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) { - printf("FAIL: Child process hit the breakpoint, which is not expected\n"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return TEST_FAIL; - } +static void test_sethwdebug_range_aligned(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */ + wp_addr = (unsigned long)&gstruct.a; + len = A_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} - if (WIFEXITED(status)) - printf("Child exited normally\n"); +static void test_sethwdebug_range_unaligned(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "WO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RO", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); - return TEST_PASS; +} + +static void test_sethwdebug_range_unaligned_dar(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */ + wp_addr = (unsigned long)&gstruct.b; + len = B_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + +static void test_sethwdebug_dawr_max_range(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr; + char *name = "PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN"; + int len; + int wh; + + /* PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW test */ + wp_addr = (unsigned long)big_var; + len = DAWR_MAX_LEN; + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "RW", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + +/* Set the breakpoints and check the child successfully trigger them */ +static void +run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) +{ + test_set_debugreg(child_pid); + if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { + test_sethwdebug_exact(child_pid); + + test_sethwdebug_range_aligned(child_pid); + if (dawr || is_8xx) { + test_sethwdebug_range_unaligned(child_pid); + test_sethwdebug_range_unaligned_dar(child_pid); + test_sethwdebug_dawr_max_range(child_pid); + } + } } static int ptrace_hwbreak(void) { - pid_t pid; - int ret; + pid_t child_pid; + struct ppc_debug_info dbginfo; bool dawr; - pid = fork(); - if (!pid) { - trigger_tests(); + child_pid = fork(); + if (!child_pid) { + test_workload(); return 0; } wait(NULL); - child_pid = pid; + get_dbginfo(child_pid, &dbginfo); + SKIP_IF(dbginfo.num_data_bps == 0); - get_dbginfo(); - SKIP_IF(!hwbreak_present()); - dawr = dawr_present(); - - ret = launch_tests(dawr); + dawr = dawr_present(&dbginfo); + run_tests(child_pid, &dbginfo, dawr); + /* Let the child exit first. */ + ptrace(PTRACE_CONT, child_pid, NULL, 0); wait(NULL); - return ret; + /* + * Testcases exits immediately with -1 on any failure. If + * it has reached here, it means all tests were successful. + */ + return TEST_PASS; } int main(int argc, char **argv, char **envp) { + int pvr = 0; + asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR)); + if (pvr == PVR_8xx) + is_8xx = true; + return test_harness(ptrace_hwbreak, "ptrace-hwbreak"); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index 25e23e73c72e..2ecfa1158e2b 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -73,7 +73,7 @@ trans: [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6", "lr" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index f603fe5a445b..6f7fb51f0809 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -74,8 +74,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r3", "r4", - "r7", "r8", "r9", "r10", "r11" + : "memory", "r0", "r3", "r4", + "r7", "r8", "r9", "r10", "r11", "lr" ); if (result) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index e0d37f07bdeb..46ef378a15ec 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -62,7 +62,7 @@ trans: [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 8027457b97b7..70ca01234f79 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -62,8 +62,8 @@ trans: "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", - "r7", "r8", "r9", "r10", "r11" + : "memory", "r0", "r3", "r4", + "r7", "r8", "r9", "r10", "r11", "lr" ); if (result) { diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore new file mode 100644 index 000000000000..0b969fba3beb --- /dev/null +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -0,0 +1 @@ +rfi_flush diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile index 85861c46b445..eadbbff50be6 100644 --- a/tools/testing/selftests/powerpc/security/Makefile +++ b/tools/testing/selftests/powerpc/security/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0+ -TEST_GEN_PROGS := rfi_flush +TEST_GEN_PROGS := rfi_flush spectre_v2 top_srcdir = ../../../../.. CFLAGS += -I../../../../../usr/include @@ -8,3 +8,6 @@ CFLAGS += -I../../../../../usr/include include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c + +$(OUTPUT)/spectre_v2: CFLAGS += -m64 +$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S diff --git a/tools/testing/selftests/powerpc/security/branch_loops.S b/tools/testing/selftests/powerpc/security/branch_loops.S new file mode 100644 index 000000000000..22e9204e3421 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/branch_loops.S @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2019, Michael Ellerman, IBM Corp. + */ + +#include <ppc-asm.h> + + .data + +jump_table: + .long 0x0 + .long (.Lstate_1 - .Lstate_0) + .long (.Lstate_2 - .Lstate_0) + .long (.Lstate_3 - .Lstate_0) + .long (.Lstate_4 - .Lstate_0) + .long (.Lstate_5 - .Lstate_0) + .long (.Lstate_6 - .Lstate_0) + .long (.Lstate_7 - .Lstate_0) + + .text + +#define ITER_SHIFT 31 + +.macro state number + .balign 32 +.Lstate_\number: + .if \number==7 + li r3, 0 + .else + li r3, \number+1 + .endif + b .Lloop +.endm + +FUNC_START(pattern_cache_loop) + li r3, 0 + li r4, 1 + sldi r4, r4, ITER_SHIFT + +.Lloop: cmpdi r4, 0 + beqlr + + addi r4, r4, -1 + + ld r6, jump_table@got(%r2) + sldi r5, r3, 2 + lwax r6, r5, r6 + ld r7, .Lstate_0@got(%r2) + add r6, r6, r7 + mtctr r6 + bctr + + state 0 + state 1 + state 2 + state 3 + state 4 + state 5 + state 6 + state 7 + +FUNC_END(pattern_cache_loop) + + +FUNC_START(indirect_branch_loop) + li r3, 1 + sldi r3, r3, ITER_SHIFT + +1: cmpdi r3, 0 + beqlr + + addi r3, r3, -1 + + ld r4, 2f@got(%r2) + mtctr r4 + bctr + + .balign 32 +2: b 1b + +FUNC_END(indirect_branch_loop) diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c new file mode 100644 index 000000000000..8c6b982af2a8 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Copyright 2018-2019 IBM Corporation. + */ + +#define __SANE_USERSPACE_TYPES__ + +#include <sys/types.h> +#include <stdint.h> +#include <malloc.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <sys/prctl.h> +#include "utils.h" + +#include "../pmu/event.h" + + +extern void pattern_cache_loop(void); +extern void indirect_branch_loop(void); + +static int do_count_loop(struct event *events, bool is_p9, s64 *miss_percent) +{ + u64 pred, mpred; + + prctl(PR_TASK_PERF_EVENTS_ENABLE); + + if (is_p9) + pattern_cache_loop(); + else + indirect_branch_loop(); + + prctl(PR_TASK_PERF_EVENTS_DISABLE); + + event_read(&events[0]); + event_read(&events[1]); + + // We could scale all the events by running/enabled but we're lazy + // As long as the PMU is uncontended they should all run + FAIL_IF(events[0].result.running != events[0].result.enabled); + FAIL_IF(events[1].result.running != events[1].result.enabled); + + pred = events[0].result.value; + mpred = events[1].result.value; + + if (is_p9) { + event_read(&events[2]); + event_read(&events[3]); + FAIL_IF(events[2].result.running != events[2].result.enabled); + FAIL_IF(events[3].result.running != events[3].result.enabled); + + pred += events[2].result.value; + mpred += events[3].result.value; + } + + *miss_percent = 100 * mpred / pred; + + return 0; +} + +static void setup_event(struct event *e, u64 config, char *name) +{ + event_init_named(e, config, name); + + e->attr.disabled = 1; + e->attr.exclude_kernel = 1; + e->attr.exclude_hv = 1; + e->attr.exclude_idle = 1; +} + +enum spectre_v2_state { + VULNERABLE = 0, + UNKNOWN = 1, // Works with FAIL_IF() + NOT_AFFECTED, + BRANCH_SERIALISATION, + COUNT_CACHE_DISABLED, + COUNT_CACHE_FLUSH_SW, + COUNT_CACHE_FLUSH_HW, + BTB_FLUSH, +}; + +static enum spectre_v2_state get_sysfs_state(void) +{ + enum spectre_v2_state state = UNKNOWN; + char buf[256]; + int len; + + memset(buf, 0, sizeof(buf)); + FAIL_IF(read_sysfs_file("devices/system/cpu/vulnerabilities/spectre_v2", buf, sizeof(buf))); + + // Make sure it's NULL terminated + buf[sizeof(buf) - 1] = '\0'; + + // Trim the trailing newline + len = strlen(buf); + FAIL_IF(len < 1); + buf[len - 1] = '\0'; + + printf("sysfs reports: '%s'\n", buf); + + // Order matters + if (strstr(buf, "Vulnerable")) + state = VULNERABLE; + else if (strstr(buf, "Not affected")) + state = NOT_AFFECTED; + else if (strstr(buf, "Indirect branch serialisation (kernel only)")) + state = BRANCH_SERIALISATION; + else if (strstr(buf, "Indirect branch cache disabled")) + state = COUNT_CACHE_DISABLED; + else if (strstr(buf, "Software count cache flush (hardware accelerated)")) + state = COUNT_CACHE_FLUSH_HW; + else if (strstr(buf, "Software count cache flush")) + state = COUNT_CACHE_FLUSH_SW; + else if (strstr(buf, "Branch predictor state flush")) + state = BTB_FLUSH; + + return state; +} + +#define PM_BR_PRED_CCACHE 0x040a4 // P8 + P9 +#define PM_BR_MPRED_CCACHE 0x040ac // P8 + P9 +#define PM_BR_PRED_PCACHE 0x048a0 // P9 only +#define PM_BR_MPRED_PCACHE 0x048b0 // P9 only + +#define SPRN_PVR 287 + +int spectre_v2_test(void) +{ + enum spectre_v2_state state; + struct event events[4]; + s64 miss_percent; + bool is_p9; + + state = get_sysfs_state(); + if (state == UNKNOWN) { + printf("Error: couldn't determine spectre_v2 mitigation state?\n"); + return -1; + } + + memset(events, 0, sizeof(events)); + + setup_event(&events[0], PM_BR_PRED_CCACHE, "PM_BR_PRED_CCACHE"); + setup_event(&events[1], PM_BR_MPRED_CCACHE, "PM_BR_MPRED_CCACHE"); + FAIL_IF(event_open(&events[0])); + FAIL_IF(event_open_with_group(&events[1], events[0].fd) == -1); + + is_p9 = ((mfspr(SPRN_PVR) >> 16) & 0xFFFF) == 0x4e; + + if (is_p9) { + // Count pattern cache too + setup_event(&events[2], PM_BR_PRED_PCACHE, "PM_BR_PRED_PCACHE"); + setup_event(&events[3], PM_BR_MPRED_PCACHE, "PM_BR_MPRED_PCACHE"); + + FAIL_IF(event_open_with_group(&events[2], events[0].fd) == -1); + FAIL_IF(event_open_with_group(&events[3], events[0].fd) == -1); + } + + FAIL_IF(do_count_loop(events, is_p9, &miss_percent)); + + event_report_justified(&events[0], 18, 10); + event_report_justified(&events[1], 18, 10); + event_close(&events[0]); + event_close(&events[1]); + + if (is_p9) { + event_report_justified(&events[2], 18, 10); + event_report_justified(&events[3], 18, 10); + event_close(&events[2]); + event_close(&events[3]); + } + + printf("Miss percent %lld %%\n", miss_percent); + + switch (state) { + case VULNERABLE: + case NOT_AFFECTED: + case COUNT_CACHE_FLUSH_SW: + case COUNT_CACHE_FLUSH_HW: + // These should all not affect userspace branch prediction + if (miss_percent > 15) { + printf("Branch misses > 15%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case BRANCH_SERIALISATION: + // This seems to affect userspace branch prediction a bit? + if (miss_percent > 25) { + printf("Branch misses > 25%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case COUNT_CACHE_DISABLED: + if (miss_percent < 95) { + printf("Branch misses < 20%% unexpected in this configuration!\n"); + printf("Possible mis-match between reported & actual mitigation\n"); + return 1; + } + break; + case UNKNOWN: + case BTB_FLUSH: + printf("Not sure!\n"); + return 1; + } + + printf("OK - Measured branch prediction rates match reported spectre v2 mitigation.\n"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(spectre_v2_test, "spectre_v2"); +} diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index dade00c698c2..08f9afe3b95c 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -42,7 +42,7 @@ #include "utils.h" /* Selftest defaults */ -#define COUNT_MAX 4000 /* Number of interactions */ +#define COUNT_MAX 600 /* Number of interactions */ #define THREADS 16 /* Number of threads */ /* Arguments options */ diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore index 0b43da74ee46..31a17e0ba884 100644 --- a/tools/testing/selftests/powerpc/stringloops/.gitignore +++ b/tools/testing/selftests/powerpc/stringloops/.gitignore @@ -1 +1,4 @@ -memcmp +memcmp_64 +memcmp_32 +strlen +strlen_32 diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 951fe855f7cd..98f2708d86cc 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -17,3 +17,4 @@ tm-vmx-unavail tm-unavailable tm-trap tm-sigreturn +tm-poison diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index c0734ed0ef56..b15a1a325bd0 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -5,7 +5,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \ - tm-signal-context-force-tm + tm-signal-context-force-tm tm-poison top_srcdir = ../../../../.. include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c new file mode 100644 index 000000000000..977558497c16 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -0,0 +1,179 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2019, Gustavo Romero, Michael Neuling, IBM Corp. + * + * This test will spawn two processes. Both will be attached to the same + * CPU (CPU 0). The child will be in a loop writing to FP register f31 and + * VMX/VEC/Altivec register vr31 a known value, called poison, calling + * sched_yield syscall after to allow the parent to switch on the CPU. + * Parent will set f31 and vr31 to 1 and in a loop will check if f31 and + * vr31 remain 1 as expected until a given timeout (2m). If the issue is + * present child's poison will leak into parent's f31 or vr31 registers, + * otherwise, poison will never leak into parent's f31 and vr31 registers. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <inttypes.h> +#include <sched.h> +#include <sys/types.h> +#include <signal.h> +#include <inttypes.h> + +#include "tm.h" + +int tm_poison_test(void) +{ + int pid; + cpu_set_t cpuset; + uint64_t poison = 0xdeadbeefc0dec0fe; + uint64_t unknown = 0; + bool fail_fp = false; + bool fail_vr = false; + + SKIP_IF(!have_htm()); + + /* Attach both Child and Parent to CPU 0 */ + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + sched_setaffinity(0, sizeof(cpuset), &cpuset); + + pid = fork(); + if (!pid) { + /** + * child + */ + while (1) { + sched_yield(); + asm ( + "mtvsrd 31, %[poison];" // f31 = poison + "mtvsrd 63, %[poison];" // vr31 = poison + + : : [poison] "r" (poison) : ); + } + } + + /** + * parent + */ + asm ( + /* + * Set r3, r4, and f31 to known value 1 before entering + * in transaction. They won't be written after that. + */ + " li 3, 0x1 ;" + " li 4, 0x1 ;" + " mtvsrd 31, 4 ;" + + /* + * The Time Base (TB) is a 64-bit counter register that is + * independent of the CPU clock and which is incremented + * at a frequency of 512000000 Hz, so every 1.953125ns. + * So it's necessary 120s/0.000000001953125s = 61440000000 + * increments to get a 2 minutes timeout. Below we set that + * value in r5 and then use r6 to track initial TB value, + * updating TB values in r7 at every iteration and comparing it + * to r6. When r7 (current) - r6 (initial) > 61440000000 we bail + * out since for sure we spent already 2 minutes in the loop. + * SPR 268 is the TB register. + */ + " lis 5, 14 ;" + " ori 5, 5, 19996 ;" + " sldi 5, 5, 16 ;" // r5 = 61440000000 + + " mfspr 6, 268 ;" // r6 (TB initial) + "1: mfspr 7, 268 ;" // r7 (TB current) + " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ? + " cmpd 7, 5 ;" + " bgt 3f ;" // yes, exit + + /* + * Main loop to check f31 + */ + " tbegin. ;" // no, try again + " beq 1b ;" // restart if no timeout + " mfvsrd 3, 31 ;" // read f31 + " cmpd 3, 4 ;" // f31 == 1 ? + " bne 2f ;" // broken :-( + " tabort. 3 ;" // try another transaction + "2: tend. ;" // commit transaction + "3: mr %[unknown], 3 ;" // record r3 + + : [unknown] "=r" (unknown) + : + : "cr0", "r3", "r4", "r5", "r6", "r7", "vs31" + + ); + + /* + * On leak 'unknown' will contain 'poison' value from child, + * otherwise (no leak) 'unknown' will contain the same value + * as r3 before entering in transactional mode, i.e. 0x1. + */ + fail_fp = unknown != 0x1; + if (fail_fp) + printf("Unknown value %#"PRIx64" leaked into f31!\n", unknown); + else + printf("Good, no poison or leaked value into FP registers\n"); + + asm ( + /* + * Set r3, r4, and vr31 to known value 1 before entering + * in transaction. They won't be written after that. + */ + " li 3, 0x1 ;" + " li 4, 0x1 ;" + " mtvsrd 63, 4 ;" + + " lis 5, 14 ;" + " ori 5, 5, 19996 ;" + " sldi 5, 5, 16 ;" // r5 = 61440000000 + + " mfspr 6, 268 ;" // r6 (TB initial) + "1: mfspr 7, 268 ;" // r7 (TB current) + " subf 7, 6, 7 ;" // r7 - r6 > 61440000000 ? + " cmpd 7, 5 ;" + " bgt 3f ;" // yes, exit + + /* + * Main loop to check vr31 + */ + " tbegin. ;" // no, try again + " beq 1b ;" // restart if no timeout + " mfvsrd 3, 63 ;" // read vr31 + " cmpd 3, 4 ;" // vr31 == 1 ? + " bne 2f ;" // broken :-( + " tabort. 3 ;" // try another transaction + "2: tend. ;" // commit transaction + "3: mr %[unknown], 3 ;" // record r3 + + : [unknown] "=r" (unknown) + : + : "cr0", "r3", "r4", "r5", "r6", "r7", "vs63" + + ); + + /* + * On leak 'unknown' will contain 'poison' value from child, + * otherwise (no leak) 'unknown' will contain the same value + * as r3 before entering in transactional mode, i.e. 0x1. + */ + fail_vr = unknown != 0x1; + if (fail_vr) + printf("Unknown value %#"PRIx64" leaked into vr31!\n", unknown); + else + printf("Good, no poison or leaked value into VEC registers\n"); + + kill(pid, SIGKILL); + + return (fail_fp | fail_vr); +} + +int main(int argc, char *argv[]) +{ + /* Test completes in about 4m */ + test_harness_set_timeout(250); + return test_harness(tm_poison_test, "tm_poison_test"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c index d57c2d2ab6ec..254f912ad611 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c @@ -5,10 +5,11 @@ * Test the kernel's signal frame code. * * The kernel sets up two sets of ucontexts if the signal was to be - * delivered while the thread was in a transaction. + * delivered while the thread was in a transaction (referred too as + * first and second contexts). * Expected behaviour is that the checkpointed state is in the user - * context passed to the signal handler. The speculated state can be - * accessed with the uc_link pointer. + * context passed to the signal handler (first context). The speculated + * state can be accessed with the uc_link pointer (second context). * * The rationale for this is that if TM unaware code (which linked * against TM libs) installs a signal handler it will not know of the @@ -28,17 +29,20 @@ #define MAX_ATTEMPT 500000 -#define NV_FPU_REGS 18 +#define NV_FPU_REGS 18 /* Number of non-volatile FP registers */ +#define FPR14 14 /* First non-volatile FP register to check in f14-31 subset */ long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); -/* Be sure there are 2x as many as there are NV FPU regs (2x18) */ +/* Test only non-volatile registers, i.e. 18 fpr registers from f14 to f31 */ static double fps[] = { + /* First context will be set with these values, i.e. non-speculative */ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + /* Second context will be set with these values, i.e. speculative */ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18 }; -static sig_atomic_t fail; +static sig_atomic_t fail, broken; static void signal_usr1(int signum, siginfo_t *info, void *uc) { @@ -46,11 +50,24 @@ static void signal_usr1(int signum, siginfo_t *info, void *uc) ucontext_t *ucp = uc; ucontext_t *tm_ucp = ucp->uc_link; - for (i = 0; i < NV_FPU_REGS && !fail; i++) { - fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]); - fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]); - if (fail) - printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]); + for (i = 0; i < NV_FPU_REGS; i++) { + /* Check first context. Print all mismatches. */ + fail = (ucp->uc_mcontext.fp_regs[FPR14 + i] != fps[i]); + if (fail) { + broken = 1; + printf("FPR%d (1st context) == %g instead of %g (expected)\n", + FPR14 + i, ucp->uc_mcontext.fp_regs[FPR14 + i], fps[i]); + } + } + + for (i = 0; i < NV_FPU_REGS; i++) { + /* Check second context. Print all mismatches. */ + fail = (tm_ucp->uc_mcontext.fp_regs[FPR14 + i] != fps[NV_FPU_REGS + i]); + if (fail) { + broken = 1; + printf("FPR%d (2nd context) == %g instead of %g (expected)\n", + FPR14 + i, tm_ucp->uc_mcontext.fp_regs[FPR14 + i], fps[NV_FPU_REGS + i]); + } } } @@ -72,13 +89,19 @@ static int tm_signal_context_chk_fpu() } i = 0; - while (i < MAX_ATTEMPT && !fail) { + while (i < MAX_ATTEMPT && !broken) { + /* + * tm_signal_self_context_load will set both first and second + * contexts accordingly to the values passed through non-NULL + * array pointers to it, in that case 'fps', and invoke the + * signal handler installed for SIGUSR1. + */ rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL); FAIL_IF(rc != pid); i++; } - return fail; + return (broken); } int main(void) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c index 4d05f8b0254c..0cc680f61828 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c @@ -5,10 +5,11 @@ * Test the kernel's signal frame code. * * The kernel sets up two sets of ucontexts if the signal was to be - * delivered while the thread was in a transaction. + * delivered while the thread was in a transaction (referred too as + * first and second contexts). * Expected behaviour is that the checkpointed state is in the user - * context passed to the signal handler. The speculated state can be - * accessed with the uc_link pointer. + * context passed to the signal handler (first context). The speculated + * state can be accessed with the uc_link pointer (second context). * * The rationale for this is that if TM unaware code (which linked * against TM libs) installs a signal handler it will not know of the @@ -28,14 +29,22 @@ #define MAX_ATTEMPT 500000 -#define NV_GPR_REGS 18 +#define NV_GPR_REGS 18 /* Number of non-volatile GPR registers */ +#define R14 14 /* First non-volatile register to check in r14-r31 subset */ long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); -static sig_atomic_t fail; +static sig_atomic_t fail, broken; -static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18}; +/* Test only non-volatile general purpose registers, i.e. r14-r31 */ +static long gprs[] = { + /* First context will be set with these values, i.e. non-speculative */ + /* R14, R15, ... */ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + /* Second context will be set with these values, i.e. speculative */ + /* R14, R15, ... */ + -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18 +}; static void signal_usr1(int signum, siginfo_t *info, void *uc) { @@ -43,12 +52,24 @@ static void signal_usr1(int signum, siginfo_t *info, void *uc) ucontext_t *ucp = uc; ucontext_t *tm_ucp = ucp->uc_link; - for (i = 0; i < NV_GPR_REGS && !fail; i++) { - fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]); - fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]); - if (fail) - printf("Failed on %d GPR %lu or %lu\n", i, - ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]); + /* Check first context. Print all mismatches. */ + for (i = 0; i < NV_GPR_REGS; i++) { + fail = (ucp->uc_mcontext.gp_regs[R14 + i] != gprs[i]); + if (fail) { + broken = 1; + printf("GPR%d (1st context) == %lu instead of %lu (expected)\n", + R14 + i, ucp->uc_mcontext.gp_regs[R14 + i], gprs[i]); + } + } + + /* Check second context. Print all mismatches. */ + for (i = 0; i < NV_GPR_REGS; i++) { + fail = (tm_ucp->uc_mcontext.gp_regs[R14 + i] != gprs[NV_GPR_REGS + i]); + if (fail) { + broken = 1; + printf("GPR%d (2nd context) == %lu instead of %lu (expected)\n", + R14 + i, tm_ucp->uc_mcontext.gp_regs[R14 + i], gprs[NV_GPR_REGS + i]); + } } } @@ -70,13 +91,19 @@ static int tm_signal_context_chk_gpr() } i = 0; - while (i < MAX_ATTEMPT && !fail) { - rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL); + while (i < MAX_ATTEMPT && !broken) { + /* + * tm_signal_self_context_load will set both first and second + * contexts accordingly to the values passed through non-NULL + * array pointers to it, in that case 'gprs', and invoke the + * signal handler installed for SIGUSR1. + */ + rc = tm_signal_self_context_load(pid, gprs, NULL, NULL, NULL); FAIL_IF(rc != pid); i++; } - return fail; + return broken; } int main(void) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c index 48ad01499b1a..b6d52730a0d8 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c @@ -5,10 +5,11 @@ * Test the kernel's signal frame code. * * The kernel sets up two sets of ucontexts if the signal was to be - * delivered while the thread was in a transaction. + * delivered while the thread was in a transaction (referred too as + * first and second contexts). * Expected behaviour is that the checkpointed state is in the user - * context passed to the signal handler. The speculated state can be - * accessed with the uc_link pointer. + * context passed to the signal handler (first context). The speculated + * state can be accessed with the uc_link pointer (second context). * * The rationale for this is that if TM unaware code (which linked * against TM libs) installs a signal handler it will not know of the @@ -29,18 +30,24 @@ #define MAX_ATTEMPT 500000 -#define NV_VMX_REGS 12 +#define NV_VMX_REGS 12 /* Number of non-volatile VMX registers */ +#define VMX20 20 /* First non-volatile register to check in vr20-31 subset */ long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); -static sig_atomic_t fail; +static sig_atomic_t fail, broken; +/* Test only non-volatile registers, i.e. 12 vmx registers from vr20 to vr31 */ vector int vms[] = { - {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12}, + /* First context will be set with these values, i.e. non-speculative */ + /* VMX20 , VMX21 , ... */ + { 1, 2, 3, 4},{ 5, 6, 7, 8},{ 9,10,11,12}, {13,14,15,16},{17,18,19,20},{21,22,23,24}, {25,26,27,28},{29,30,31,32},{33,34,35,36}, {37,38,39,40},{41,42,43,44},{45,46,47,48}, - {-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12}, + /* Second context will be set with these values, i.e. speculative */ + /* VMX20 , VMX21 , ... */ + { -1, -2, -3, -4},{ -5, -6, -7, -8},{ -9,-10,-11,-12}, {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24}, {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36}, {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48} @@ -48,26 +55,43 @@ vector int vms[] = { static void signal_usr1(int signum, siginfo_t *info, void *uc) { - int i; + int i, j; ucontext_t *ucp = uc; ucontext_t *tm_ucp = ucp->uc_link; - for (i = 0; i < NV_VMX_REGS && !fail; i++) { - fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20], + for (i = 0; i < NV_VMX_REGS; i++) { + /* Check first context. Print all mismatches. */ + fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[VMX20 + i], &vms[i], sizeof(vector int)); - fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20], - &vms[i + NV_VMX_REGS], sizeof (vector int)); - if (fail) { - int j; + broken = 1; + printf("VMX%d (1st context) == 0x", VMX20 + i); + /* Print actual value in first context. */ + for (j = 0; j < 4; j++) + printf("%08x", ucp->uc_mcontext.v_regs->vrregs[VMX20 + i][j]); + printf(" instead of 0x"); + /* Print expected value. */ + for (j = 0; j < 4; j++) + printf("%08x", vms[i][j]); + printf(" (expected)\n"); + } + } - fprintf(stderr, "Failed on %d vmx 0x", i); + for (i = 0; i < NV_VMX_REGS; i++) { + /* Check second context. Print all mismatches. */ + fail = memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[VMX20 + i], + &vms[NV_VMX_REGS + i], sizeof (vector int)); + if (fail) { + broken = 1; + printf("VMX%d (2nd context) == 0x", NV_VMX_REGS + i); + /* Print actual value in second context. */ + for (j = 0; j < 4; j++) + printf("%08x", tm_ucp->uc_mcontext.v_regs->vrregs[VMX20 + i][j]); + printf(" instead of 0x"); + /* Print expected value. */ for (j = 0; j < 4; j++) - fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]); - fprintf(stderr, " vs 0x"); - for (j = 0 ; j < 4; j++) - fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]); - fprintf(stderr, "\n"); + printf("%08x", vms[NV_VMX_REGS + i][j]); + printf(" (expected)\n"); } } } @@ -90,13 +114,19 @@ static int tm_signal_context_chk() } i = 0; - while (i < MAX_ATTEMPT && !fail) { + while (i < MAX_ATTEMPT && !broken) { + /* + * tm_signal_self_context_load will set both first and second + * contexts accordingly to the values passed through non-NULL + * array pointers to it, in that case 'vms', and invoke the + * signal handler installed for SIGUSR1. + */ rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL); FAIL_IF(rc != pid); i++; } - return fail; + return (broken); } int main(void) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c index 8c8677a408bb..8e25e2072ecd 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c @@ -5,10 +5,11 @@ * Test the kernel's signal frame code. * * The kernel sets up two sets of ucontexts if the signal was to be - * delivered while the thread was in a transaction. + * delivered while the thread was in a transaction (referred too as + * first and second contexts). * Expected behaviour is that the checkpointed state is in the user - * context passed to the signal handler. The speculated state can be - * accessed with the uc_link pointer. + * context passed to the signal handler (first context). The speculated + * state can be accessed with the uc_link pointer (second context). * * The rationale for this is that if TM unaware code (which linked * against TM libs) installs a signal handler it will not know of the @@ -29,17 +30,24 @@ #define MAX_ATTEMPT 500000 -#define NV_VSX_REGS 12 +#define NV_VSX_REGS 12 /* Number of VSX registers to check. */ +#define VSX20 20 /* First VSX register to check in vsr20-vsr31 subset */ +#define FPR20 20 /* FPR20 overlaps VSX20 most significant doubleword */ long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss); -static sig_atomic_t fail; +static sig_atomic_t fail, broken; -vector int vss[] = { - {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12}, +/* Test only 12 vsx registers from vsr20 to vsr31 */ +vector int vsxs[] = { + /* First context will be set with these values, i.e. non-speculative */ + /* VSX20 , VSX21 , ... */ + { 1, 2, 3, 4},{ 5, 6, 7, 8},{ 9,10,11,12}, {13,14,15,16},{17,18,19,20},{21,22,23,24}, {25,26,27,28},{29,30,31,32},{33,34,35,36}, {37,38,39,40},{41,42,43,44},{45,46,47,48}, + /* Second context will be set with these values, i.e. speculative */ + /* VSX20 , VSX21 , ... */ {-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12}, {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24}, {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36}, @@ -48,41 +56,91 @@ vector int vss[] = { static void signal_usr1(int signum, siginfo_t *info, void *uc) { - int i; - uint8_t vsc[sizeof(vector int)]; - uint8_t vst[sizeof(vector int)]; + int i, j; + uint8_t vsx[sizeof(vector int)]; + uint8_t vsx_tm[sizeof(vector int)]; ucontext_t *ucp = uc; ucontext_t *tm_ucp = ucp->uc_link; /* - * The other half of the VSX regs will be after v_regs. + * FP registers and VMX registers overlap the VSX registers. + * + * FP registers (f0-31) overlap the most significant 64 bits of VSX + * registers vsr0-31, whilst VMX registers vr0-31, being 128-bit like + * the VSX registers, overlap fully the other half of VSX registers, + * i.e. vr0-31 overlaps fully vsr32-63. + * + * Due to compatibility and historical reasons (VMX/Altivec support + * appeared first on the architecture), VMX registers vr0-31 (so VSX + * half vsr32-63 too) are stored right after the v_regs pointer, in an + * area allocated for 'vmx_reverse' array (please see + * arch/powerpc/include/uapi/asm/sigcontext.h for details about the + * mcontext_t structure on Power). + * + * The other VSX half (vsr0-31) is hence stored below vr0-31/vsr32-63 + * registers, but only the least significant 64 bits of vsr0-31. The + * most significant 64 bits of vsr0-31 (f0-31), as it overlaps the FP + * registers, is kept in fp_regs. + * + * v_regs is a 16 byte aligned pointer at the start of vmx_reserve + * (vmx_reserve may or may not be 16 aligned) where the v_regs structure + * exists, so v_regs points to where vr0-31 / vsr32-63 registers are + * fully stored. Since v_regs type is elf_vrregset_t, v_regs + 1 + * skips all the slots used to store vr0-31 / vsr32-64 and points to + * part of one VSX half, i.e. v_regs + 1 points to the least significant + * 64 bits of vsr0-31. The other part of this half (the most significant + * part of vsr0-31) is stored in fp_regs. * - * In short, vmx_reserve array holds everything. v_regs is a 16 - * byte aligned pointer at the start of vmx_reserve (vmx_reserve - * may or may not be 16 aligned) where the v_regs structure exists. - * (half of) The VSX regsters are directly after v_regs so the - * easiest way to find them below. */ + /* Get pointer to least significant doubleword of vsr0-31 */ long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1); long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1); - for (i = 0; i < NV_VSX_REGS && !fail; i++) { - memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8); - memcpy(vsc + 8, &vsx_ptr[20 + i], 8); - fail = memcmp(vsc, &vss[i], sizeof(vector int)); - memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8); - memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8); - fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int)); - if (fail) { - int j; + /* Check first context. Print all mismatches. */ + for (i = 0; i < NV_VSX_REGS; i++) { + /* + * Copy VSX most significant doubleword from fp_regs and + * copy VSX least significant one from 64-bit slots below + * saved VMX registers. + */ + memcpy(vsx, &ucp->uc_mcontext.fp_regs[FPR20 + i], 8); + memcpy(vsx + 8, &vsx_ptr[VSX20 + i], 8); + + fail = memcmp(vsx, &vsxs[i], sizeof(vector int)); - fprintf(stderr, "Failed on %d vsx 0x", i); + if (fail) { + broken = 1; + printf("VSX%d (1st context) == 0x", VSX20 + i); for (j = 0; j < 16; j++) - fprintf(stderr, "%02x", vsc[j]); - fprintf(stderr, " vs 0x"); + printf("%02x", vsx[j]); + printf(" instead of 0x"); + for (j = 0; j < 4; j++) + printf("%08x", vsxs[i][j]); + printf(" (expected)\n"); + } + } + + /* Check second context. Print all mismatches. */ + for (i = 0; i < NV_VSX_REGS; i++) { + /* + * Copy VSX most significant doubleword from fp_regs and + * copy VSX least significant one from 64-bit slots below + * saved VMX registers. + */ + memcpy(vsx_tm, &tm_ucp->uc_mcontext.fp_regs[FPR20 + i], 8); + memcpy(vsx_tm + 8, &tm_vsx_ptr[VSX20 + i], 8); + + fail = memcmp(vsx_tm, &vsxs[NV_VSX_REGS + i], sizeof(vector int)); + + if (fail) { + broken = 1; + printf("VSX%d (2nd context) == 0x", VSX20 + i); for (j = 0; j < 16; j++) - fprintf(stderr, "%02x", vst[j]); - fprintf(stderr, "\n"); + printf("%02x", vsx_tm[j]); + printf(" instead of 0x"); + for (j = 0; j < 4; j++) + printf("%08x", vsxs[NV_VSX_REGS + i][j]); + printf("(expected)\n"); } } } @@ -105,13 +163,19 @@ static int tm_signal_context_chk() } i = 0; - while (i < MAX_ATTEMPT && !fail) { - rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss); + while (i < MAX_ATTEMPT && !broken) { + /* + * tm_signal_self_context_load will set both first and second + * contexts accordingly to the values passed through non-NULL + * array pointers to it, in that case 'vsxs', and invoke the + * signal handler installed for SIGUSR1. + */ + rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vsxs); FAIL_IF(rc != pid); i++; } - return fail; + return (broken); } int main(void) diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 56fbf9f6bbf3..07c388147b75 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -10,10 +10,12 @@ */ #define _GNU_SOURCE +#include <stdio.h> #include <stdlib.h> #include <signal.h> #include "utils.h" +#include "tm.h" void trap_signal_handler(int signo, siginfo_t *si, void *uc) { @@ -29,6 +31,8 @@ int tm_signal_sigreturn_nt(void) { struct sigaction trap_sa; + SKIP_IF(!have_htm()); + trap_sa.sa_flags = SA_SIGINFO; trap_sa.sa_sigaction = trap_signal_handler; diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index 97f9f491c541..c402464b038f 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -55,7 +55,8 @@ static inline bool failure_is_unavailable(void) static inline bool failure_is_reschedule(void) { if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED || - (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED) + (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED || + (failure_code() & TM_CAUSE_KVM_FAC_UNAV) == TM_CAUSE_KVM_FAC_UNAV) return true; return false; diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index c02d24835db4..5ee0e98c4896 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -127,6 +127,26 @@ bool is_ppc64le(void) return strcmp(uts.machine, "ppc64le") == 0; } +int read_sysfs_file(char *fpath, char *result, size_t result_size) +{ + char path[PATH_MAX] = "/sys/"; + int rc = -1, fd; + + strncat(path, fpath, PATH_MAX - strlen(path) - 1); + + if ((fd = open(path, O_RDONLY)) < 0) + return rc; + + rc = read(fd, result, result_size); + + close(fd); + + if (rc < 0) + return rc; + + return 0; +} + int read_debugfs_file(char *debugfs_file, int *result) { int rc = -1, fd; diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c index 47b7473dedef..e6aa00a183bc 100644 --- a/tools/testing/selftests/proc/proc-self-map-files-002.c +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -47,7 +47,11 @@ static void fail(const char *fmt, unsigned long a, unsigned long b) int main(void) { const int PAGE_SIZE = sysconf(_SC_PAGESIZE); - const unsigned long va_max = 1UL << 32; + /* + * va_max must be enough bigger than vm.mmap_min_addr, which is + * 64KB/32KB by default. (depends on CONFIG_LSM_MMAP_MIN_ADDR) + */ + const unsigned long va_max = 1UL << 20; unsigned long va; void *p; int fd; diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index bd4a7247b44f..c0dd10257df5 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -44,6 +44,46 @@ static int clock_adjtime(clockid_t id, struct timex *tx) } #endif +static void show_flag_test(int rq_index, unsigned int flags, int err) +{ + printf("PTP_EXTTS_REQUEST%c flags 0x%08x : (%d) %s\n", + rq_index ? '1' + rq_index : ' ', + flags, err, strerror(errno)); + /* sigh, uClibc ... */ + errno = 0; +} + +static void do_flag_test(int fd, unsigned int index) +{ + struct ptp_extts_request extts_request; + unsigned long request[2] = { + PTP_EXTTS_REQUEST, + PTP_EXTTS_REQUEST2, + }; + unsigned int enable_flags[5] = { + PTP_ENABLE_FEATURE, + PTP_ENABLE_FEATURE | PTP_RISING_EDGE, + PTP_ENABLE_FEATURE | PTP_FALLING_EDGE, + PTP_ENABLE_FEATURE | PTP_RISING_EDGE | PTP_FALLING_EDGE, + PTP_ENABLE_FEATURE | (PTP_EXTTS_VALID_FLAGS + 1), + }; + int err, i, j; + + memset(&extts_request, 0, sizeof(extts_request)); + extts_request.index = index; + + for (i = 0; i < 2; i++) { + for (j = 0; j < 5; j++) { + extts_request.flags = enable_flags[j]; + err = ioctl(fd, request[i], &extts_request); + show_flag_test(i, extts_request.flags, err); + + extts_request.flags = 0; + err = ioctl(fd, request[i], &extts_request); + } + } +} + static clockid_t get_clockid(int fd) { #define CLOCKFD 3 @@ -96,7 +136,8 @@ static void usage(char *progname) " -s set the ptp clock time from the system time\n" " -S set the system time from the ptp clock time\n" " -t val shift the ptp clock time by 'val' seconds\n" - " -T val set the ptp clock time to 'val' seconds\n", + " -T val set the ptp clock time to 'val' seconds\n" + " -z test combinations of rising/falling external time stamp flags\n", progname); } @@ -122,6 +163,7 @@ int main(int argc, char *argv[]) int adjtime = 0; int capabilities = 0; int extts = 0; + int flagtest = 0; int gettime = 0; int index = 0; int list_pins = 0; @@ -138,7 +180,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) { switch (c) { case 'c': capabilities = 1; @@ -191,6 +233,9 @@ int main(int argc, char *argv[]) settime = 3; seconds = atoi(optarg); break; + case 'z': + flagtest = 1; + break; case 'h': usage(progname); return 0; @@ -322,6 +367,10 @@ int main(int argc, char *argv[]) } } + if (flagtest) { + do_flag_test(fd, index); + } + if (list_pins) { int n_pins = 0; if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) { diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh index 4e9485590c10..1dbfb62567d2 100755 --- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh +++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh @@ -15,8 +15,15 @@ then exit 0 fi ncpus=`grep '^processor' /proc/cpuinfo | wc -l` -idlecpus=`mpstat | tail -1 | \ - awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` +if mpstat -V > /dev/null 2>&1 +then + idlecpus=`mpstat | tail -1 | \ + awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'` +else + # No mpstat command, so use all available CPUs. + echo The mpstat command is not available, so greedily using all CPUs. + idlecpus=$ncpus +fi awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null ' BEGIN { cpus2use = idlecpus; diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index dc49a3ba6111..30cb5b27d32e 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -23,25 +23,39 @@ spinmax=${4-1000} n=1 -starttime=`awk 'BEGIN { print systime(); }' < /dev/null` +starttime=`gawk 'BEGIN { print systime(); }' < /dev/null` + +nohotplugcpus= +for i in /sys/devices/system/cpu/cpu[0-9]* +do + if test -f $i/online + then + : + else + curcpu=`echo $i | sed -e 's/^[^0-9]*//'` + nohotplugcpus="$nohotplugcpus $curcpu" + fi +done while : do # Check for done. - t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null` + t=`gawk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null` if test "$t" -gt "$duration" then exit 0; fi # Set affinity to randomly selected online CPU - cpus=`grep 1 /sys/devices/system/cpu/*/online | - sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'` - - # Do not leave out poor old cpu0 which may not be hot-pluggable - if [ ! -f "/sys/devices/system/cpu/cpu0/online" ]; then - cpus="0 $cpus" + if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 | + sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'` + then + : + else + cpus= fi + # Do not leave out non-hot-pluggable CPUs + cpus="$cpus $nohotplugcpus" cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN { srand(n + me + systime()); diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index 2a7f3f4756a7..9d9a41625dd9 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -25,6 +25,7 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^\[[ 0-9.]*] //' | awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | tr -d '\012\015'`" +fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`" if test -z "$ngps" then echo "$configfile ------- " $stopstate @@ -39,7 +40,7 @@ else BEGIN { print ngps / dur }' < /dev/null` title="$title ($ngpsps/s)" fi - echo $title $stopstate + echo $title $stopstate $fwdprog nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` if test -z "$nclosecalls" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 27b7b5693ede..e0352304b98b 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -123,7 +123,7 @@ qemu_args=$5 boot_args=$6 cd $KVM -kstarttime=`awk 'BEGIN { print systime() }' < /dev/null` +kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` if test -z "$TORTURE_BUILDONLY" then echo ' ---' `date`: Starting kernel @@ -133,11 +133,10 @@ fi qemu_args="-enable-kvm -nographic $qemu_args" cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment` cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"` -vcpus=`identify_qemu_vcpus` -if test $cpu_count -gt $vcpus +if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS" then - echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings - cpu_count=$vcpus + echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings + cpu_count=$TORTURE_ALLOTED_CPUS fi qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`" @@ -177,7 +176,7 @@ do then qemu_pid=`cat "$resdir/qemu_pid"` fi - kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1 then if test $kruntime -ge $seconds @@ -213,7 +212,7 @@ then oldline="`tail $resdir/console.log`" while : do - kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` + kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null` if kill -0 $qemu_pid > /dev/null 2>&1 then : @@ -227,7 +226,7 @@ then must_continue=yes fi last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`" - if test -z "last_ts" + if test -z "$last_ts" then last_ts=0 fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 72518580df23..78d18ab8e954 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -24,7 +24,9 @@ dur=$((30*60)) dryrun="" KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM PATH=${KVM}/bin:$PATH; export PATH -TORTURE_ALLOTED_CPUS="" +. functions.sh + +TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD @@ -40,8 +42,6 @@ cpus=0 ds=`date +%Y.%m.%d-%H:%M:%S` jitter="-1" -. functions.sh - usage () { echo "Usage: $scriptname optional arguments:" echo " --bootargs kernel-boot-arguments" @@ -93,6 +93,11 @@ do checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--' cpus=$2 TORTURE_ALLOTED_CPUS="$2" + max_cpus="`identify_qemu_vcpus`" + if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus" + then + TORTURE_ALLOTED_CPUS=$max_cpus + fi shift ;; --datestamp) @@ -198,9 +203,10 @@ fi CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG +defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`" if test -z "$configs" then - configs="`cat $CONFIGFRAG/CFLIST`" + configs=$defaultconfigs fi if test -z "$resdir" @@ -209,7 +215,7 @@ then fi # Create a file of test-name/#cpus pairs, sorted by decreasing #cpus. -touch $T/cfgcpu +configs_derep= for CF in $configs do case $CF in @@ -222,15 +228,21 @@ do CF1=$CF ;; esac + for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) + do + configs_derep="$configs_derep $CF1" + done +done +touch $T/cfgcpu +configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" +for CF1 in $configs_derep +do if test -f "$CONFIGFRAG/$CF1" then cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` - for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) - do - echo $CF1 $cpu_count >> $T/cfgcpu - done + echo $CF1 $cpu_count >> $T/cfgcpu else echo "The --configs file $CF1 does not exist, terminating." exit 1 diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 6fa9bd1ddc09..38e424d2392c 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -20,58 +20,9 @@ if [ -s "$D/initrd/init" ]; then exit 0 fi -T=${TMPDIR-/tmp}/mkinitrd.sh.$$ -trap 'rm -rf $T' 0 2 -mkdir $T - -cat > $T/init << '__EOF___' -#!/bin/sh -# Run in userspace a few milliseconds every second. This helps to -# exercise the NO_HZ_FULL portions of RCU. The 192 instances of "a" was -# empirically shown to give a nice multi-millisecond burst of user-mode -# execution on a 2GHz CPU, as desired. Modern CPUs will vary from a -# couple of milliseconds up to perhaps 100 milliseconds, which is an -# acceptable range. -# -# Why not calibrate an exact delay? Because within this initrd, we -# are restricted to Bourne-shell builtins, which as far as I know do not -# provide any means of obtaining a fine-grained timestamp. - -a4="a a a a" -a16="$a4 $a4 $a4 $a4" -a64="$a16 $a16 $a16 $a16" -a192="$a64 $a64 $a64" -while : -do - q= - for i in $a192 - do - q="$q $i" - done - sleep 1 -done -__EOF___ - -# Try using dracut to create initrd -if command -v dracut >/dev/null 2>&1 -then - echo Creating $D/initrd using dracut. - # Filesystem creation - dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img - cd $D - mkdir -p initrd - cd initrd - zcat $T/initramfs.img | cpio -id - cp $T/init init - chmod +x init - echo Done creating $D/initrd using dracut - exit 0 -fi - -# No dracut, so create a C-language initrd/init program and statically -# link it. This results in a very small initrd, but might be a bit less -# future-proof than dracut. -echo "Could not find dracut, attempting C initrd" +# Create a C-language initrd/init infinite-loop program and statically +# link it. This results in a very small initrd. +echo "Creating a statically linked C-language initrd" cd $D mkdir -p initrd cd initrd diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index 28568b72a31b..ea4399020c6c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -1,8 +1,5 @@ CONFIG_SMP=y CONFIG_NR_CPUS=2 -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 35e639e39366..65daee4fbf5a 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 CONFIG_RCU_NOCB_CPU=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 5c3213cc3ad7..1c218944b1e9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -3,3 +3,4 @@ rcutree.gp_preinit_delay=12 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 +threadirqs diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 24c9f6012e35..f6d6a40c0576 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_FAST_NO_HZ=y CONFIG_RCU_TRACE=y -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=4 CONFIG_RCU_FANOUT_LEAF=3 CONFIG_DEBUG_LOCK_ALLOC=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 05a4eec3f27b..bf4980d606b5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 CONFIG_RCU_NOCB_CPU=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index fb1c763c10c5..c810c5276a89 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -9,9 +9,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_FAST_NO_HZ=n CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index 6710e749d9de..8523a7515cbf 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -8,9 +8,6 @@ CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL index 4d8eb5bfb6f6..5d546efa68e8 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL +++ b/tools/testing/selftests/rcutorture/configs/rcu/TRIVIAL @@ -6,9 +6,6 @@ CONFIG_PREEMPT=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n -CONFIG_HOTPLUG_CPU=n -CONFIG_SUSPEND=n -CONFIG_HIBERNATION=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index af6fca03602f..1b96d68473b8 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -6,7 +6,6 @@ Kconfig Parameters: CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not. CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one. -CONFIG_HOTPLUG_CPU -- Do half. (Every second.) CONFIG_HZ_PERIODIC -- Do one. CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement. diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index eec2663261f2..e8a657a5f48a 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -15,7 +15,7 @@ #include <errno.h> #include <stddef.h> -static inline pid_t gettid(void) +static inline pid_t rseq_gettid(void) { return syscall(__NR_gettid); } @@ -373,11 +373,12 @@ void *test_percpu_spinlock_thread(void *arg) rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) - printf_verbose("tid %d: count %lld\n", (int) gettid(), i); + printf_verbose("tid %d: count %lld\n", + (int) rseq_gettid(), i); #endif } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && thread_data->reg && rseq_unregister_current_thread()) abort(); @@ -454,11 +455,12 @@ void *test_percpu_inc_thread(void *arg) } while (rseq_unlikely(ret)); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10))) - printf_verbose("tid %d: count %lld\n", (int) gettid(), i); + printf_verbose("tid %d: count %lld\n", + (int) rseq_gettid(), i); #endif } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && thread_data->reg && rseq_unregister_current_thread()) abort(); @@ -605,7 +607,7 @@ void *test_percpu_list_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); @@ -796,7 +798,7 @@ void *test_percpu_buffer_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); @@ -1011,7 +1013,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg) } printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n", - (int) gettid(), nr_abort, signals_delivered); + (int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d40d60e7499e..3f63eb362b92 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -149,11 +149,13 @@ static inline void rseq_clear_rseq_cs(void) /* * rseq_prepare_unload() should be invoked by each thread executing a rseq * critical section at least once between their last critical section and - * library unload of the library defining the rseq critical section - * (struct rseq_cs). This also applies to use of rseq in code generated by - * JIT: rseq_prepare_unload() should be invoked at least once by each - * thread executing a rseq critical section before reclaim of the memory - * holding the struct rseq_cs. + * library unload of the library defining the rseq critical section (struct + * rseq_cs) or the code referred to by the struct rseq_cs start_ip and + * post_commit_offset fields. This also applies to use of rseq in code + * generated by JIT: rseq_prepare_unload() should be invoked at least once by + * each thread executing a rseq critical section before reclaim of the memory + * holding the struct rseq_cs or reclaim of the code pointed to by struct + * rseq_cs start_ip and post_commit_offset fields. */ static inline void rseq_prepare_unload(void) { diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/rseq/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings new file mode 100644 index 000000000000..ba4d85f74cd6 --- /dev/null +++ b/tools/testing/selftests/rtc/settings @@ -0,0 +1 @@ +timeout=90 diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile index 98da7a504737..fa02c4d5ec13 100644 --- a/tools/testing/selftests/safesetid/Makefile +++ b/tools/testing/selftests/safesetid/Makefile @@ -1,8 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for mount selftests. -CFLAGS = -Wall -lcap -O2 +CFLAGS = -Wall -O2 +LDLIBS = -lcap -TEST_PROGS := run_tests.sh +TEST_PROGS := safesetid-test.sh TEST_GEN_FILES := safesetid-test include ../lib.mk diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c index 8f40c6ecdad1..0c4d50644c13 100644 --- a/tools/testing/selftests/safesetid/safesetid-test.c +++ b/tools/testing/selftests/safesetid/safesetid-test.c @@ -213,7 +213,8 @@ static void test_setuid(uid_t child_uid, bool expect_success) } if (cpid == 0) { /* Code executed by child */ - setuid(child_uid); + if (setuid(child_uid) < 0) + exit(EXIT_FAILURE); if (getuid() == child_uid) exit(EXIT_SUCCESS); else @@ -291,8 +292,10 @@ int main(int argc, char **argv) // First test to make sure we can write userns mappings from a user // that doesn't have any restrictions (as long as it has CAP_SETUID); - setuid(NO_POLICY_USER); - setgid(NO_POLICY_USER); + if (setuid(NO_POLICY_USER) < 0) + die("Error with set uid(%d)\n", NO_POLICY_USER); + if (setgid(NO_POLICY_USER) < 0) + die("Error with set gid(%d)\n", NO_POLICY_USER); // Take away all but setid caps drop_caps(true); @@ -306,8 +309,10 @@ int main(int argc, char **argv) die("test_userns failed when it should work\n"); } - setuid(RESTRICTED_PARENT); - setgid(RESTRICTED_PARENT); + if (setuid(RESTRICTED_PARENT) < 0) + die("Error with set uid(%d)\n", RESTRICTED_PARENT); + if (setgid(RESTRICTED_PARENT) < 0) + die("Error with set gid(%d)\n", RESTRICTED_PARENT); test_setuid(ROOT_USER, false); test_setuid(ALLOWED_CHILD1, true); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 6ef7f16c4cf5..ee1b727ede04 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -35,6 +35,7 @@ #include <stdbool.h> #include <string.h> #include <time.h> +#include <limits.h> #include <linux/elf.h> #include <sys/uio.h> #include <sys/utsname.h> @@ -43,6 +44,7 @@ #include <sys/times.h> #include <sys/socket.h> #include <sys/ioctl.h> +#include <linux/kcmp.h> #include <unistd.h> #include <sys/syscall.h> @@ -112,6 +114,8 @@ struct seccomp_data { # define __NR_seccomp 383 # elif defined(__aarch64__) # define __NR_seccomp 277 +# elif defined(__riscv) +# define __NR_seccomp 277 # elif defined(__hppa__) # define __NR_seccomp 338 # elif defined(__powerpc__) @@ -199,6 +203,15 @@ struct seccomp_notif_sizes { }; #endif +#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY +#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 +#define PTRACE_EVENTMSG_SYSCALL_EXIT 2 +#endif + +#ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE +#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001 +#endif + #ifndef seccomp int seccomp(unsigned int op, unsigned int flags, void *args) { @@ -1582,6 +1595,10 @@ TEST_F(TRACE_poke, getpid_runs_normally) # define ARCH_REGS struct user_pt_regs # define SYSCALL_NUM regs[8] # define SYSCALL_RET regs[0] +#elif defined(__riscv) && __riscv_xlen == 64 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM a7 +# define SYSCALL_RET a0 #elif defined(__hppa__) # define ARCH_REGS struct user_regs_struct # define SYSCALL_NUM gr[20] @@ -1671,7 +1688,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret) {} #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) + defined(__s390__) || defined(__hppa__) || defined(__riscv) { regs.SYSCALL_NUM = syscall; } @@ -3072,7 +3089,7 @@ static int user_trap_syscall(int nr, unsigned int flags) return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog); } -#define USER_NOTIF_MAGIC 116983961184613L +#define USER_NOTIF_MAGIC INT_MAX TEST(user_notification_basic) { pid_t pid; @@ -3141,7 +3158,18 @@ TEST(user_notification_basic) EXPECT_GT(poll(&pollfd, 1, -1), 0); EXPECT_EQ(pollfd.revents, POLLIN); - EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + /* Test that we can't pass garbage to the kernel. */ + memset(&req, 0, sizeof(req)); + req.pid = -1; + errno = 0; + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); + + if (ret) { + req.pid = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + } pollfd.fd = listener; pollfd.events = POLLIN | POLLOUT; @@ -3261,6 +3289,7 @@ TEST(user_notification_signal) close(sk_pair[1]); + memset(&req, 0, sizeof(req)); EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); EXPECT_EQ(kill(pid, SIGUSR1), 0); @@ -3279,6 +3308,7 @@ TEST(user_notification_signal) EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); EXPECT_EQ(errno, ENOENT); + memset(&req, 0, sizeof(req)); EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); resp.id = req.id; @@ -3480,6 +3510,108 @@ TEST(seccomp_get_notif_sizes) EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); } +static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) +{ +#ifdef __NR_kcmp + return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); +#else + errno = ENOSYS; + return -1; +#endif +} + +TEST(user_notification_continue) +{ + pid_t pid; + long ret; + int status, listener; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + struct pollfd pollfd; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int dup_fd, pipe_fds[2]; + pid_t self; + + ret = pipe(pipe_fds); + if (ret < 0) + exit(1); + + dup_fd = dup(pipe_fds[0]); + if (dup_fd < 0) + exit(1); + + self = getpid(); + + ret = filecmp(self, self, pipe_fds[0], dup_fd); + if (ret) + exit(2); + + exit(0); + } + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLIN); + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + pollfd.fd = listener; + pollfd.events = POLLIN | POLLOUT; + + EXPECT_GT(poll(&pollfd, 1, -1), 0); + EXPECT_EQ(pollfd.revents, POLLOUT); + + EXPECT_EQ(req.data.nr, __NR_dup); + + resp.id = req.id; + resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE; + + /* + * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other + * args be set to 0. + */ + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.error = USER_NOTIF_MAGIC; + resp.val = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1); + EXPECT_EQ(errno, EINVAL); + + resp.error = 0; + resp.val = 0; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { + if (errno == EINVAL) + XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); + } + +skip: + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)) { + if (WEXITSTATUS(status) == 2) { + XFAIL(return, "Kernel does not support kcmp() syscall"); + return; + } + } +} + /* * TODO: * - add microbenchmarks diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index 2ad45b944355..2980b1a63366 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -11,23 +11,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include <sys/sysinfo.h> #include <unistd.h> +#include <sys/syscall.h> #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -79,12 +91,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -100,5 +112,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c index f3d599f249b9..7741c0518d18 100644 --- a/tools/testing/selftests/sync/sync.c +++ b/tools/testing/selftests/sync/sync.c @@ -109,7 +109,7 @@ static struct sync_file_info *sync_file_info(int fd) return NULL; } - info->sync_fence_info = (uint64_t)fence_info; + info->sync_fence_info = (uint64_t)(unsigned long)fence_info; err = ioctl(fd, SYNC_IOC_FILE_INFO, info); if (err < 0) { @@ -124,7 +124,7 @@ static struct sync_file_info *sync_file_info(int fd) static void sync_file_info_free(struct sync_file_info *info) { - free((void *)info->sync_fence_info); + free((void *)(unsigned long)info->sync_fence_info); free(info); } @@ -152,7 +152,7 @@ int sync_fence_count_with_status(int fd, int status) if (!info) return -1; - fence_info = (struct sync_fence_info *)info->sync_fence_info; + fence_info = (struct sync_fence_info *)(unsigned long)info->sync_fence_info; for (i = 0 ; i < info->num_fences ; i++) { if (fence_info[i].status == status) count++; diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README index 22e5da9008fd..b0954c873e2f 100644 --- a/tools/testing/selftests/tc-testing/README +++ b/tools/testing/selftests/tc-testing/README @@ -128,7 +128,9 @@ optional arguments: -v, --verbose Show the commands that are being run -N, --notap Suppress tap results for command under test -d DEVICE, --device DEVICE - Execute the test case in flower category + Execute test cases that use a physical device, where + DEVICE is its name. (If not defined, tests that require + a physical device will be skipped) -P, --pause Pause execution just before post-suite stage selection: diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 7c551968d184..477bc61b374a 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -1,3 +1,12 @@ +# +# Core Netfilter Configuration +# +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CONNTRACK_LABELS=y +CONFIG_NF_NAT=m + CONFIG_NET_SCHED=y # @@ -42,6 +51,7 @@ CONFIG_NET_ACT_CTINFO=m CONFIG_NET_ACT_SKBMOD=m CONFIG_NET_ACT_IFE=m CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_IFE_SKBMARK=m CONFIG_NET_IFE_SKBPRIO=m diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py index e98c36750fae..d34fe06268d2 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py @@ -54,7 +54,7 @@ class SubPlugin(TdcPlugin): shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=ENVIR) + env=os.environ.copy()) (rawout, serr) = proc.communicate() if proc.returncode != 0 and len(serr) > 0: diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index affa7f2d9670..9539cffa9e5e 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -64,7 +64,7 @@ class SubPlugin(TdcPlugin): cmdlist.insert(0, self.args.NAMES['NS']) cmdlist.insert(0, 'exec') cmdlist.insert(0, 'netns') - cmdlist.insert(0, 'ip') + cmdlist.insert(0, self.args.NAMES['IP']) else: pass @@ -78,16 +78,16 @@ class SubPlugin(TdcPlugin): return command def _ports_create(self): - cmd = 'ip link add $DEV0 type veth peer name $DEV1' + cmd = '$IP link add $DEV0 type veth peer name $DEV1' self._exec_cmd('pre', cmd) - cmd = 'ip link set $DEV0 up' + cmd = '$IP link set $DEV0 up' self._exec_cmd('pre', cmd) if not self.args.namespace: - cmd = 'ip link set $DEV1 up' + cmd = '$IP link set $DEV1 up' self._exec_cmd('pre', cmd) def _ports_destroy(self): - cmd = 'ip link del $DEV0' + cmd = '$IP link del $DEV0' self._exec_cmd('post', cmd) def _ns_create(self): @@ -97,16 +97,16 @@ class SubPlugin(TdcPlugin): ''' self._ports_create() if self.args.namespace: - cmd = 'ip netns add {}'.format(self.args.NAMES['NS']) + cmd = '$IP netns add {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS']) + cmd = '$IP link set $DEV1 netns {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) + cmd = '$IP -n {} link set $DEV1 up'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) if self.args.device: - cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS']) + cmd = '$IP link set $DEV2 netns {}'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) - cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) + cmd = '$IP -n {} link set $DEV2 up'.format(self.args.NAMES['NS']) self._exec_cmd('pre', cmd) def _ns_destroy(self): @@ -115,7 +115,7 @@ class SubPlugin(TdcPlugin): devices as well) ''' if self.args.namespace: - cmd = 'ip netns delete {}'.format(self.args.NAMES['NS']) + cmd = '$IP netns delete {}'.format(self.args.NAMES['NS']) self._exec_cmd('post', cmd) def _exec_cmd(self, stage, command): diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index ddabb2fbb7c7..88ec134872e4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -525,5 +525,29 @@ "teardown": [ "$TC actions flush action csum" ] + }, + { + "id": "eaf0", + "name": "Add csum iph action with no_percpu flag", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action csum iph no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action csum", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json index 62b82fe10c89..4202e95e27b9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json @@ -24,6 +24,30 @@ ] }, { + "id": "e38c", + "name": "Add simple ct action with cookie", + "category": [ + "actions", + "ct" + ], + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ct index 42 cookie deadbeef", + "expExitCode": "0", + "verifyCmd": "$TC actions list action ct", + "matchPattern": "action order [0-9]*: ct zone 0 pipe.*index 42 ref.*cookie deadbeef", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ct" + ] + }, + { "id": "9f20", "name": "Add ct clear action", "category": [ @@ -48,6 +72,30 @@ ] }, { + "id": "0bc1", + "name": "Add ct clear action with cookie of max length", + "category": [ + "actions", + "ct" + ], + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ct clear index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action ct", + "matchPattern": "action order [0-9]*: ct clear pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ct" + ] + }, + { "id": "5bea", "name": "Try ct with zone", "category": [ @@ -310,5 +358,53 @@ "teardown": [ "$TC actions flush action ct" ] + }, + { + "id": "2faa", + "name": "Try ct with mark + mask and cookie", + "category": [ + "actions", + "ct" + ], + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ct mark 0x42/0xf0 index 42 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action ct", + "matchPattern": "action order [0-9]*: ct mark 66/0xf0 zone 0 pipe.*index 42 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ct" + ] + }, + { + "id": "3991", + "name": "Add simple ct action with no_percpu flag", + "category": [ + "actions", + "ct" + ], + "setup": [ + [ + "$TC actions flush action ct", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action ct no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action ct", + "matchPattern": "action order [0-9]*: ct zone 0 pipe.*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ct" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 814b7a8a478b..b24494c6f546 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -585,5 +585,29 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "95ad", + "name": "Add gact pass action with no_percpu flag", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pass no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index 2232b21e2510..12a2fe0e1472 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -553,5 +553,29 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "31e3", + "name": "Add mirred mirror to egress action with no_percpu flag", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror dev lo no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mirred", + "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json index e31a080edc49..866f0efd0859 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json @@ -168,6 +168,54 @@ ] }, { + "id": "09d2", + "name": "Add mpls dec_ttl action with opcode and cookie", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mpls dec_ttl pipe index 8 cookie aabbccddeeff", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mpls", + "matchPattern": "action order [0-9]+: mpls.*dec_ttl pipe.*index 8 ref.*cookie aabbccddeeff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { + "id": "c170", + "name": "Add mpls dec_ttl action with opcode and cookie of max length", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mpls dec_ttl continue index 8 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mpls", + "matchPattern": "action order [0-9]+: mpls.*dec_ttl continue.*index 8 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { "id": "9118", "name": "Add mpls dec_ttl action with invalid opcode", "category": [ @@ -302,6 +350,30 @@ ] }, { + "id": "91fb", + "name": "Add mpls pop action with ip proto and cookie", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mpls pop protocol ipv4 cookie 12345678", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mpls", + "matchPattern": "action order [0-9]+: mpls.*pop.*protocol.*ip.*pipe.*ref 1.*cookie 12345678", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { "id": "92fe", "name": "Add mpls pop action with mpls proto", "category": [ @@ -508,6 +580,30 @@ ] }, { + "id": "7c34", + "name": "Add mpls push action with label, tc ttl and cookie of max length", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mpls push label 20 tc 3 ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mpls", + "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*20.*tc.*3.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { "id": "16eb", "name": "Add mpls push action with label and bos", "category": [ @@ -828,6 +924,30 @@ ] }, { + "id": "77c1", + "name": "Add mpls mod action with mpls ttl and cookie", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action mpls mod ttl 128 cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action mpls", + "matchPattern": "action order [0-9]+: mpls.*modify.*ttl.*128.*pipe.*ref 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { "id": "b80f", "name": "Add mpls mod action with mpls max ttl", "category": [ @@ -1037,6 +1157,31 @@ ] }, { + "id": "95a9", + "name": "Replace existing mpls push action with new label, tc, ttl and cookie", + "category": [ + "actions", + "mpls" + ], + "setup": [ + [ + "$TC actions flush action mpls", + 0, + 1, + 255 + ], + "$TC actions add action mpls push label 20 tc 3 ttl 128 index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2" + ], + "cmdUnderTest": "$TC actions replace action mpls push label 30 tc 2 ttl 125 pipe index 1 cookie aa11bb22cc33", + "expExitCode": "0", + "verifyCmd": "$TC actions get action mpls index 1", + "matchPattern": "action order [0-9]+: mpls.*push.*protocol.*mpls_uc.*label.*30 tc 2 ttl 125 pipe.*index 1.*cookie aa11bb22cc33", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mpls" + ] + }, + { "id": "6cce", "name": "Delete mpls pop action", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json index 0d319f1d01db..f8ea6f5fa8e9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -349,6 +349,281 @@ ] }, { + "id": "1762", + "name": "Add pedit action with RAW_OP offset u8 clear value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 clear", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "bcee", + "name": "Add pedit action with RAW_OP offset u8 retain value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 set 0x11 retain 0x0f", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 01000000 mask f0ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "e89f", + "name": "Add pedit action with RAW_OP offset u16 retain value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 set 0x1122 retain 0xff00", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 11000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "c282", + "name": "Add pedit action with RAW_OP offset u32 clear value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 clear", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "c422", + "name": "Add pedit action with RAW_OP offset u16 invert value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u16 invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 12: val ffff0000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "d3d3", + "name": "Add pedit action with RAW_OP offset u32 invert value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 12 u32 invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 12: val ffffffff mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "57e5", + "name": "Add pedit action with RAW_OP offset u8 preserve value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u8 preserve", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "99e0", + "name": "Add pedit action with RAW_OP offset u16 preserve value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u16 preserve", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "1892", + "name": "Add pedit action with RAW_OP offset u32 preserve value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset 0 u32 preserve", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 1.*key #0.*at 0: val 00000000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "4b60", + "name": "Add pedit action with RAW_OP negative offset u16/u32 set value", + "category": [ + "actions", + "pedit", + "raw_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge offset -14 u16 set 0x0000 munge offset -12 u32 set 0x00000100 munge offset -8 u32 set 0x0aaf0100 munge offset -4 u32 set 0x0008eb06 pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+:.*pedit.*keys 4.*key #0.*at -16: val 00000000 mask ffff0000.*key #1.*at -12: val 00000100 mask 00000000.*key #2.*at -8: val 0aaf0100 mask 00000000.*key #3.*at -4: val 0008eb06 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "a5a7", + "name": "Add pedit action with LAYERED_OP eth set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src set 11:22:33:44:55:66", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 00001122 mask ffff0000.*key #1 at eth\\+8: val 33445566 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "86d4", "name": "Add pedit action with LAYERED_OP eth set src & dst", "category": [ @@ -374,6 +649,31 @@ ] }, { + "id": "f8a9", + "name": "Add pedit action with LAYERED_OP eth set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set 11:22:33:44:55:66", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val 11223344 mask 00000000.*key #1 at eth\\+4: val 55660000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "c715", "name": "Add pedit action with LAYERED_OP eth set src (INVALID)", "category": [ @@ -399,6 +699,31 @@ ] }, { + "id": "8131", + "name": "Add pedit action with LAYERED_OP eth set dst (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst set %e:11:m2:33:x4:-5", + "expExitCode": "255", + "verifyCmd": "/bin/true", + "matchPattern": " ", + "matchCount": "0", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "ba22", "name": "Add pedit action with LAYERED_OP eth type set/clear sequence", "category": [ @@ -424,6 +749,179 @@ ] }, { + "id": "dec4", + "name": "Add pedit action with LAYERED_OP eth set type (INVALID)", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type set 0xabcdef", + "expExitCode": "255", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth+12: val ", + "matchCount": "0", + "teardown": [] + }, + { + "id": "ab06", + "name": "Add pedit action with LAYERED_OP eth add type", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type add 0x1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: add 00010000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "918d", + "name": "Add pedit action with LAYERED_OP eth invert src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth src invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+4: val 0000ff00 mask ffff0000.*key #1 at eth\\+8: val 00000000 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "a8d4", + "name": "Add pedit action with LAYERED_OP eth invert dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth dst invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 2.*key #0 at eth\\+0: val ff000000 mask 00000000.*key #1 at eth\\+4: val 00000000 mask 0000ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "ee13", + "name": "Add pedit action with LAYERED_OP eth invert type", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge eth type invert", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at eth\\+12: val ffff0000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7588", + "name": "Add pedit action with LAYERED_OP ip set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip src set 1.1.1.1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 12: val 01010101 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "0fa7", + "name": "Add pedit action with LAYERED_OP ip set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip dst set 2.2.2.2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at 16: val 02020202 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "5810", "name": "Add pedit action with LAYERED_OP ip set src & dst", "category": [ @@ -599,6 +1097,206 @@ ] }, { + "id": "cc8a", + "name": "Add pedit action with LAYERED_OP ip set tos", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip tos set 0x4 continue", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action continue keys 1.*key #0 at 0: val 00040000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7a17", + "name": "Add pedit action with LAYERED_OP ip set precedence", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip precedence set 3 jump 2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action jump 2 keys 1.*key #0 at 0: val 00030000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "c3b6", + "name": "Add pedit action with LAYERED_OP ip add tos", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip tos add 0x1 pass", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*key #0 at ipv4\\+0: add 00010000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "43d3", + "name": "Add pedit action with LAYERED_OP ip add precedence", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip precedence add 0x1 pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pipe keys 1.*key #0 at ipv4\\+0: add 00010000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "438e", + "name": "Add pedit action with LAYERED_OP ip clear tos", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip tos clear continue", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action continue keys 1.*key #0 at 0: val 00000000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "6b1b", + "name": "Add pedit action with LAYERED_OP ip clear precedence", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip precedence clear jump 2", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action jump 2 keys 1.*key #0 at 0: val 00000000 mask ff00ffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "824a", + "name": "Add pedit action with LAYERED_OP ip invert tos", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip tos invert pipe", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pipe keys 1.*key #0 at 0: val 00ff0000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "106f", + "name": "Add pedit action with LAYERED_OP ip invert precedence", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit munge ip precedence invert reclassify", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action reclassify keys 1.*key #0 at 0: val 00ff0000 mask ffffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "6829", "name": "Add pedit action with LAYERED_OP beyond ip set dport & sport", "category": [ @@ -674,6 +1372,56 @@ ] }, { + "id": "815c", + "name": "Add pedit action with LAYERED_OP ip6 set src", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 src set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+8: val 20010db8 mask 00000000.*key #1 at ipv6\\+12: val 0000f101 mask 00000000.*key #2 at ipv6\\+16: val 00000000 mask 00000000.*key #3 at ipv6\\+20: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "4dae", + "name": "Add pedit action with LAYERED_OP ip6 set dst", + "category": [ + "actions", + "pedit", + "layered_op" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip6 dst set 2001:0db8:0:f101::1", + "expExitCode": "0", + "verifyCmd": "$TC actions list action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 4.*key #0 at ipv6\\+24: val 20010db8 mask 00000000.*key #1 at ipv6\\+28: val 0000f101 mask 00000000.*key #2 at ipv6\\+32: val 00000000 mask 00000000.*key #3 at ipv6\\+36: val 00000001 mask 00000000", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { "id": "fc1f", "name": "Add pedit action with LAYERED_OP ip6 set src & dst", "category": [ @@ -950,5 +1698,4 @@ "$TC actions flush action pedit" ] } - ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index 28453a445fdb..fbeb9197697d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -909,5 +909,29 @@ "teardown": [ "$TC actions flush action tunnel_key" ] + }, + { + "id": "0cd2", + "name": "Add tunnel_key set action with no_percpu flag", + "category": [ + "actions", + "tunnel_key" + ], + "setup": [ + [ + "$TC actions flush action tunnel_key", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action tunnel_key", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action tunnel_key" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 6503b1ce091f..41d783254b08 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -807,5 +807,29 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "1a3d", + "name": "Add vlan pop action with no_percpu flag", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action vlan pop no_percpu", + "expExitCode": "0", + "verifyCmd": "$TC actions list action vlan", + "matchPattern": "action order [0-9]+: vlan.*pop.*no_percpu", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json new file mode 100644 index 000000000000..98a20faf3198 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -0,0 +1,376 @@ +[ + { + "id": "7a92", + "name": "Add basic filter with cmp ematch u8/link layer and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "2e8a", + "name": "Add basic filter with cmp ematch u8/link layer with trans flag and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff trans gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff trans gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "4d9f", + "name": "Add basic filter with cmp ematch u16/link layer and a single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u16 at 0 layer 0 mask 0xff00 lt 3)' action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u16 at 0 layer 0 mask 0xff00 lt 3\\).*action.*gact action pass", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "4943", + "name": "Add basic filter with cmp ematch u32/link layer and miltiple actions", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u32 at 4 layer link mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u32 at 4 layer 0 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7559", + "name": "Add basic filter with cmp ematch u8/network layer and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 basic match 'cmp(u8 at 0 layer 1 mask 0xff gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xab prio 11 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 11 basic.*handle 0xab flowid 1:1.*cmp\\(u8 at 0 layer 1 mask 0xff gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "aff4", + "name": "Add basic filter with cmp ematch u8/network layer with trans flag and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 basic match 'cmp(u8 at 0 layer 1 mask 0xff trans gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xab prio 11 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 11 basic.*handle 0xab flowid 1:1.*cmp\\(u8 at 0 layer 1 mask 0xff trans gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "c732", + "name": "Add basic filter with cmp ematch u16/network layer and a single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x100 protocol ip prio 100 basic match 'cmp(u16 at 0 layer network mask 0xff00 lt 3)' action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x100 prio 100 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 100 basic.*handle 0x100.*cmp\\(u16 at 0 layer 1 mask 0xff00 lt 3\\).*action.*gact action pass", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "32d8", + "name": "Add basic filter with cmp ematch u32/network layer and miltiple actions", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x112233 protocol ip prio 7 basic match 'cmp(u32 at 4 layer network mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0x112233 prio 7 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 7 basic.*handle 0x112233.*cmp\\(u32 at 4 layer 1 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "b99c", + "name": "Add basic filter with cmp ematch u8/transport layer and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer transport mask 0xff gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 2 mask 0xff gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "0752", + "name": "Add basic filter with cmp ematch u8/transport layer with trans flag and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer transport mask 0xff trans gt 10)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*cmp\\(u8 at 0 layer 2 mask 0xff trans gt 10\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "7e07", + "name": "Add basic filter with cmp ematch u16/transport layer and a single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u16 at 0 layer 2 mask 0xff00 lt 3)' action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u16 at 0 layer 2 mask 0xff00 lt 3\\).*action.*gact action pass", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "62d7", + "name": "Add basic filter with cmp ematch u32/transport layer and miltiple actions", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u32 at 4 layer transport mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u32 at 4 layer 2 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "304b", + "name": "Add basic filter with NOT cmp ematch rule and default action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'not cmp(u8 at 0 layer link mask 0xff eq 3)' classid 1:1", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*NOT cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\)", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "8ecb", + "name": "Add basic filter with two ANDed cmp ematch rules and single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "b1ad", + "name": "Add basic filter with two ORed cmp ematch rules and single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) or cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*OR cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "4600", + "name": "Add basic filter with two ANDed cmp ematch rules and one ORed ematch rule and single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bc59", + "name": "Add basic filter with two ANDed cmp ematch rules and one NOT ORed ematch rule and single action", + "category": [ + "filter", + "basic" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or not cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic", + "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR NOT cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json index 9002714b1851..c2a433a4737e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json @@ -12,7 +12,7 @@ "$TC qdisc add dev $DEV2 ingress", "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 add" ], - "cmdUnderTest": "find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/add* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -37,7 +37,7 @@ "$TC -b $BATCH_DIR/add_0", "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 del" ], - "cmdUnderTest": "find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -62,7 +62,7 @@ "$TC -b $BATCH_DIR/add_0", "./tdc_multibatch.py $DEV2 $BATCH_DIR 100000 10 replace" ], - "cmdUnderTest": "find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -87,7 +87,7 @@ "$TC -b $BATCH_DIR/add_0", "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 replace" ], - "cmdUnderTest": "find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/replace* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -112,7 +112,7 @@ "$TC -b $BATCH_DIR/add_0", "./tdc_multibatch.py -d $DEV2 $BATCH_DIR 100000 10 del" ], - "cmdUnderTest": "find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/del* -print | xargs -n 1 -P 10 $TC -f -b\"", "expExitCode": "123", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -134,11 +134,11 @@ "/bin/mkdir $BATCH_DIR", "$TC qdisc add dev $DEV2 ingress", "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 5 add", - "find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b", + "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 add", "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" ], - "cmdUnderTest": "find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", @@ -160,11 +160,11 @@ "/bin/mkdir $BATCH_DIR", "$TC qdisc add dev $DEV2 ingress", "./tdc_multibatch.py -x init_ $DEV2 $BATCH_DIR 100000 10 add", - "find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b", + "bash -c \"find $BATCH_DIR/init_* -print | xargs -n 1 -P 5 $TC -b\"", "./tdc_multibatch.py -x par_ -a 500001 -m 5 $DEV2 $BATCH_DIR 100000 5 replace", "./tdc_multibatch.py -x par_ $DEV2 $BATCH_DIR 100000 5 del" ], - "cmdUnderTest": "find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b", + "cmdUnderTest": "bash -c \"find $BATCH_DIR/par_* -print | xargs -n 1 -P 10 $TC -b\"", "expExitCode": "0", "verifyCmd": "$TC -s filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower chain 0 handle", diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json new file mode 100644 index 000000000000..51799874a972 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json @@ -0,0 +1,391 @@ +[ + { + "id": "f62b", + "name": "Add ingress matchall filter for protocol ipv4 and action PASS", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f09", + "name": "Add egress matchall filter for protocol ipv4 and action PASS", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ip matchall action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 1 protocol ip matchall", + "matchPattern": "^filter parent 1: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0596", + "name": "Add ingress matchall filter for protocol ipv6 and action DROP", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv6 matchall", + "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "41df", + "name": "Add egress matchall filter for protocol ipv6 and action DROP", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ipv6 matchall action drop", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 1 protocol ipv6 matchall", + "matchPattern": "^filter parent 1: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e1da", + "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 65535 protocol ipv4 matchall action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 65535 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "3de5", + "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority at 16-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 65535 protocol ipv4 matchall action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 65535 protocol ipv4 matchall", + "matchPattern": "^filter parent 1: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "72d7", + "name": "Add ingress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 655355 protocol ipv4 matchall action pass", + "expExitCode": "255", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 655355 protocol ipv4 matchall", + "matchPattern": "^filter parent ffff: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "41d3", + "name": "Add egress matchall filter for protocol ipv4 and action PASS with priority exceeding 16-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 655355 protocol ipv4 matchall action pass", + "expExitCode": "255", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 1 prio 655355 protocol ipv4 matchall", + "matchPattern": "^filter parent 1: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f755", + "name": "Add ingress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0xffffffff prio 1 protocol all matchall action continue", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0xffffffff prio 1 protocol all matchall", + "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2c33", + "name": "Add egress matchall filter for all protocols and action CONTINUE with handle at 32-bit maximum", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0xffffffff prio 1 protocol all matchall action continue", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 0xffffffff prio 1 protocol all matchall", + "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0e4a", + "name": "Add ingress matchall filter for all protocols and action RECLASSIFY with skip_hw flag", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall", + "matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f60", + "name": "Add egress matchall filter for all protocols and action RECLASSIFY with skip_hw flag", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent 1: handle 0x1 prio 1 protocol all matchall", + "matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: prio", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8bd2", + "name": "Add ingress matchall filter for protocol ipv6 and action PASS with classid", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:1 action pass", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall", + "matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:1.*gact action pass.*ref 1 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2a4a", + "name": "Add ingress matchall filter for protocol ipv6 and action PASS with invalid classid", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 6789defg action pass", + "expExitCode": "1", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall", + "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 6789defg.*gact action pass.*ref 1 bind 1", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "eaf8", + "name": "Delete single ingress matchall filter", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:2 action pass" + ], + "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall", + "matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:2.*gact action pass.*ref 1 bind 1", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "76ad", + "name": "Delete all ingress matchall filters", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass" + ], + "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff:", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DUMMY parent ffff:", + "matchPattern": "^filter protocol all pref.*matchall.*handle.*flowid.*gact action pass", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "1eb9", + "name": "Delete single ingress matchall filter out of multiple", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x3 prio 3 protocol all matchall classid 1:4 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x4 prio 4 protocol all matchall classid 1:5 action pass" + ], + "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: protocol all handle 0x2 prio 2 matchall", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DUMMY parent ffff:", + "matchPattern": "^filter protocol all pref 2 matchall.*handle 0x2 flowid 1:2.*gact action pass", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "6d63", + "name": "Delete ingress matchall filter by chain ID", + "category": [ + "filter", + "matchall" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all chain 1 matchall classid 1:1 action pass", + "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 chain 2 matchall classid 1:3 action continue" + ], + "cmdUnderTest": "$TC filter del dev $DUMMY parent ffff: chain 2", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DUMMY parent ffff:", + "matchPattern": "^filter protocol all pref 1 matchall chain 1 handle 0x1 flowid 1:1.*gact action pass", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index 0f89cd50a94b..8877f7b2b809 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -1,27 +1,5 @@ [ { - "id": "e9a3", - "name": "Add u32 with source match", - "category": [ - "filter", - "u32" - ], - "plugins": { - "requires": "nsPlugin" - }, - "setup": [ - "$TC qdisc add dev $DEV1 ingress" - ], - "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok", - "expExitCode": "0", - "verifyCmd": "$TC filter show dev $DEV1 parent ffff:", - "matchPattern": "match 7f000001/ffffffff at 12", - "matchCount": "1", - "teardown": [ - "$TC qdisc del dev $DEV1 ingress" - ] - }, - { "id": "2638", "name": "Add matchall and try to get it", "category": [ diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json new file mode 100644 index 000000000000..e09d3c0e307f --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -0,0 +1,205 @@ +[ + { + "id": "afa9", + "name": "Add u32 with source match", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1.*match 7f000001/ffffffff at 12)", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6aa7", + "name": "Add/Replace u32 with source match and invalid indev", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 indev notexist20 flowid 1:1 action ok", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 1 u32 chain 0", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "bc4d", + "name": "Replace valid u32 with source match and invalid indev", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.3/32 flowid 1:3 action ok" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.2/32 indev notexist20 flowid 1:2 action ok", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:3.*match 7f000003/ffffffff at 12)", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "648b", + "name": "Add u32 with custom hash table", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "6658", + "name": "Add/Replace u32 with custom hash table and invalid handle", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 256", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "pref 99 u32 chain 0", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "9d0a", + "name": "Replace valid u32 with custom hash table and invalid handle", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 128", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "1644", + "name": "Add u32 filter that links to a custom hash table", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress prio 99 handle 43: u32 divisor 256" + ], + "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 link 43: hashkey mask 0x0000ff00 at 12 match ip src 192.168.0.0/16", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 98 u32 chain (0[ ]+$|0 fh 801: ht divisor 1|0 fh 801::800 order 2048 key ht 801 bkt 0 link 43:.*match c0a80000/ffff0000 at 12.*hash mask 0000ff00 at 12)", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "74c2", + "name": "Add/Replace u32 filter with invalid hash table id", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 20 u32 ht 47:47 action drop", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 20 u32 chain 0", + "matchCount": "0", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + }, + { + "id": "1fe6", + "name": "Replace valid u32 filter with invalid hash table id", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 99 handle 43: u32 divisor 1", + "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 ht 43: match tcp src 22 FFFF classid 1:3" + ], + "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 98 u32 ht 43:1 match tcp src 23 FFFF classid 1:4", + "expExitCode": "2", + "verifyCmd": "$TC filter show dev $DEV1 ingress", + "matchPattern": "filter protocol ip pref 99 u32 chain (0[ ]+$|0 fh (43|800): ht divisor 1|0 fh 43::800 order 2048 key ht 43 bkt 0 flowid 1:3.*match 00160000/ffff0000 at nexthdr\\+0)", + "matchCount": "4", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json new file mode 100644 index 000000000000..180593010675 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json @@ -0,0 +1,940 @@ +[ + { + "id": "e90e", + "name": "Add ETS qdisc using bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 2", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "b059", + "name": "Add ETS qdisc using quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 900 800 700", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e8e7", + "name": "Add ETS qdisc using strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 3", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "233c", + "name": "Add ETS qdisc using bands + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 quanta 1000 900 800 700", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "3d35", + "name": "Add ETS qdisc using bands + strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 3 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f3b", + "name": "Add ETS qdisc using strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 strict 3 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "4593", + "name": "Add ETS qdisc using strict 0 + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 0 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 2 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8938", + "name": "Add ETS qdisc using bands + strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 strict 3 quanta 1500 750", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 .*strict 3 quanta 1500 750 priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0782", + "name": "Add ETS qdisc with more bands than quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 2 .*quanta 1000 [1-9][0-9]* priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "501b", + "name": "Add ETS qdisc with more bands than strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta ([1-9][0-9]* ){2}priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "671a", + "name": "Add ETS qdisc with more bands than strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1 quanta 1000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta 1000 [1-9][0-9]* priomap", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2a23", + "name": "Add ETS qdisc with 16 bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 16", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8daf", + "name": "Add ETS qdisc with 17 bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7f95", + "name": "Add ETS qdisc with 17 strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "837a", + "name": "Add ETS qdisc with 16 quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "65b6", + "name": "Add ETS qdisc with 17 quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "b9e9", + "name": "Add ETS qdisc with 16 strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 8 quanta 1 2 3 4 5 6 7 8", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .* bands 16", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "9877", + "name": "Add ETS qdisc with 17 strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 9 quanta 1 2 3 4 5 6 7 8", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "c696", + "name": "Add ETS qdisc with priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "30c4", + "name": "Add ETS qdisc with quanta + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e8ac", + "name": "Add ETS qdisc with strict + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*bands 5 strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5a7e", + "name": "Add ETS qdisc with quanta + strict + priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "cb8b", + "name": "Show ETS class :1", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:1", + "matchPattern": "class ets 1:1 root quantum 4000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "1b4e", + "name": "Show ETS class :2", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:2", + "matchPattern": "class ets 1:2 root quantum 3000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f642", + "name": "Show ETS class :3", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:3", + "matchPattern": "class ets 1:3 root quantum 2000", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "0a5f", + "name": "Show ETS strict class", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3", + "expExitCode": "0", + "verifyCmd": "$TC class show dev $DUMMY classid 1:1", + "matchPattern": "class ets 1:1 root $", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f7c8", + "name": "Add ETS qdisc with too many quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000 2000 3000", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "2389", + "name": "Add ETS qdisc with too many strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 3", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "fe3c", + "name": "Add ETS qdisc with too many strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 2 quanta 1000 2000 3000", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "cb04", + "name": "Add ETS qdisc with excess priomap elements", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "c32e", + "name": "Add ETS qdisc with priomap above bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "744c", + "name": "Add ETS qdisc with priomap above quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 500 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7b33", + "name": "Add ETS qdisc with priomap above strict", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 priomap 0 1 2", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "dbe6", + "name": "Add ETS qdisc with priomap above strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 1 quanta 1000 500 priomap 0 1 2 3", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "bdb2", + "name": "Add ETS qdisc with priomap within bands with strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "39a3", + "name": "Add ETS qdisc with priomap above bands with strict + quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3 4", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "557c", + "name": "Unset priorities default to the last band", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 priomap 0 0 0 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*priomap 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "a347", + "name": "Unset priorities default to the last band -- no priomap", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "39c4", + "name": "Add ETS qdisc with too few bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 0", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "930b", + "name": "Add ETS qdisc with too many bands", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "406a", + "name": "Add ETS qdisc without parameters", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e51a", + "name": "Zero element in quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 0 800 700", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "e7f2", + "name": "Sole zero element in quanta", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 0", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "d6e6", + "name": "No values after the quanta keyword", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta", + "expExitCode": "255", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "28c6", + "name": "Change ETS band quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets quantum 1500", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1500 2000 3000 priomap ", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "4714", + "name": "Change ETS band without quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 priomap ", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "6979", + "name": "Change quantum of a strict ETS band", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets strict 5" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets quantum 1500", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*bands 5 .*strict 5", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "9a7d", + "name": "Change ETS strict band without quantum", + "category": [ + "qdisc", + "ets" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root ets strict 5" + ], + "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc ets .*bands 5 .*strict 5", + "matchCount": "1", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json new file mode 100644 index 000000000000..5ecd93b4c473 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -0,0 +1,304 @@ +[ + { + "id": "a519", + "name": "Add bfifo qdisc with system default parameters on egress", + "__comment": "When omitted, queue size in bfifo is calculated as: txqueuelen * (MTU + LinkLayerHdrSize), where LinkLayerHdrSize=14 for Ethernet", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root.*limit [0-9]+b", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root bfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "585c", + "name": "Add pfifo qdisc with system default parameters on egress", + "__comment": "When omitted, queue size in pfifo is defaulted to the interface's txqueuelen value.", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc pfifo 1: root.*limit [0-9]+p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root pfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "a86e", + "name": "Add bfifo qdisc with system default parameters on egress with handle of maximum value", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: bfifo", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo ffff: root.*limit [0-9]+b", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle ffff: root bfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "9ac8", + "name": "Add bfifo qdisc on egress with queue size of 3000 bytes", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit 3000b", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root.*limit 3000b", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root bfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "f4e6", + "name": "Add pfifo qdisc on egress with queue size of 3000 packets", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY txqueuelen 3000 type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo limit 3000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc pfifo 1: root.*limit 3000p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root pfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "b1b1", + "name": "Add bfifo qdisc with system default parameters on egress with invalid handle exceeding maximum value", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: bfifo", + "expExitCode": "255", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 10000: root.*limit [0-9]+b", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "8d5e", + "name": "Add bfifo qdisc on egress with unsupported argument", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo foorbar", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7787", + "name": "Add pfifo qdisc on egress with unsupported argument", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo foorbar", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc pfifo 1: root", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "c4b6", + "name": "Replace bfifo qdisc on egress with new queue size", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link del dev $DUMMY type dummy || /bin/true", + "$IP link add dev $DUMMY txqueuelen 1000 type dummy", + "$TC qdisc add dev $DUMMY handle 1: root bfifo" + ], + "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root bfifo limit 3000b", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root.*limit 3000b", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root bfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "3df6", + "name": "Replace pfifo qdisc on egress with new queue size", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link del dev $DUMMY type dummy || /bin/true", + "$IP link add dev $DUMMY txqueuelen 1000 type dummy", + "$TC qdisc add dev $DUMMY handle 1: root pfifo" + ], + "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root pfifo limit 30", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc pfifo 1: root.*limit 30p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root pfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "7a67", + "name": "Add bfifo qdisc on egress with queue size in invalid format", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit foo-bar", + "expExitCode": "1", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root.*limit foo-bar", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "1298", + "name": "Add duplicate bfifo qdisc on egress", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root bfifo" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root bfifo", + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "45a0", + "name": "Delete nonexistent bfifo qdisc", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: bfifo", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "972b", + "name": "Add prio qdisc on egress with invalid format for handles", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ bfifo limit 100b", + "expExitCode": "255", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 123 root", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "4d39", + "name": "Delete bfifo qdisc twice", + "category": [ + "qdisc", + "fifo" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: bfifo", + "$TC qdisc del dev $DUMMY root handle 1: bfifo" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root bfifo", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc bfifo 1: root", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json index f518c55f468b..d99dba6e2b1a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json @@ -7,16 +7,16 @@ "ingress" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress", + "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress", "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 ingress", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -27,15 +27,15 @@ "ingress" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress foorbar", + "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress foorbar", "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -46,17 +46,17 @@ "ingress" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true", - "$TC qdisc add dev $DEV1 ingress" + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 ingress", + "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 ingress", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY ingress", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -67,15 +67,15 @@ "ingress" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress", + "cmdUnderTest": "$TC qdisc del dev $DUMMY ingress", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -86,17 +86,17 @@ "ingress" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true", - "$TC qdisc add dev $DEV1 ingress", - "$TC qdisc del dev $DEV1 ingress" + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY ingress", + "$TC qdisc del dev $DUMMY ingress" ], - "cmdUnderTest": "$TC qdisc del dev $DEV1 ingress", + "cmdUnderTest": "$TC qdisc del dev $DUMMY ingress", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json index 9c792fa8ca23..3076c02d08d6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json @@ -7,16 +7,16 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio", "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 handle 1: root prio", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY handle 1: root prio", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -27,15 +27,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle ffff: prio", + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: prio", "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio ffff: root", "matchCount": "1", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -46,15 +46,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 10000: prio", + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: prio", "expExitCode": "255", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 10000: root", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -65,15 +65,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio foorbar", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio foorbar", "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -84,16 +84,16 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0", "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 handle 1: root prio", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY handle 1: root prio", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -104,15 +104,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1", "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -123,15 +123,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0", "expExitCode": "1", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -142,15 +142,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -161,15 +161,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -180,17 +180,17 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true", - "$TC qdisc add dev $DEV1 handle 1: root prio" + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root prio" ], - "cmdUnderTest": "$TC qdisc replace dev $DEV1 handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0", + "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0", "expExitCode": "0", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 handle 1: root prio", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY handle 1: root prio", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -201,17 +201,17 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true", - "$TC qdisc add dev $DEV1 handle 1: root prio" + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root prio" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 handle 1: root prio", + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DEV1 handle 1: root prio", - "$IP link del dev $DEV1 type dummy" + "$TC qdisc del dev $DUMMY handle 1: root prio", + "$IP link del dev $DUMMY type dummy" ] }, { @@ -222,15 +222,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc del dev $DEV1 root handle 1: prio", + "cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: prio", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 1: root", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -241,15 +241,15 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true" + "$IP link add dev $DUMMY type dummy || /bin/true" ], - "cmdUnderTest": "$TC qdisc add dev $DEV1 root handle 123^ prio", + "cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ prio", "expExitCode": "255", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc prio 123 root", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] }, { @@ -260,17 +260,17 @@ "prio" ], "setup": [ - "$IP link add dev $DEV1 type dummy || /bin/true", - "$TC qdisc add dev $DEV1 root handle 1: prio", - "$TC qdisc del dev $DEV1 root handle 1: prio" + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: prio", + "$TC qdisc del dev $DUMMY root handle 1: prio" ], - "cmdUnderTest": "$TC qdisc del dev $DEV1 handle 1: root prio", + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root prio", "expExitCode": "2", - "verifyCmd": "$TC qdisc show dev $DEV1", + "verifyCmd": "$TC qdisc show dev $DUMMY", "matchPattern": "qdisc ingress ffff:", "matchCount": "0", "teardown": [ - "$IP link del dev $DEV1 type dummy" + "$IP link del dev $DUMMY type dummy" ] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index f04321ace9fb..e566c70e64a1 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -356,12 +356,14 @@ def test_runner(pm, args, filtered_tests): time.sleep(2) for tidx in testlist: if "flower" in tidx["category"] and args.device == None: + errmsg = "Tests using the DEV2 variable must define the name of a " + errmsg += "physical NIC with the -d option when running tdc.\n" + errmsg += "Test has been skipped." if args.verbose > 1: - print('Not executing test {} {} because DEV2 not defined'. - format(tidx['id'], tidx['name'])) + print(errmsg) res = TestResult(tidx['id'], tidx['name']) res.set_result(ResultState.skip) - res.set_errormsg('Not executed because DEV2 is not defined') + res.set_errormsg(errmsg) tsr.add_resultdata(res) continue try: @@ -499,7 +501,9 @@ def set_args(parser): choices=['none', 'xunit', 'tap'], help='Specify the format for test results. (Default: TAP)') parser.add_argument('-d', '--device', - help='Execute the test case in flower category') + help='Execute test cases that use a physical device, ' + + 'where DEVICE is its name. (If not defined, tests ' + + 'that require a physical device will be skipped)') parser.add_argument( '-P', '--pause', action='store_true', help='Pause execution just before post-suite stage') diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index b771d4c89621..080709cc4297 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -16,6 +16,7 @@ NAMES = { 'DEV0': 'v0p0', 'DEV1': 'v0p1', 'DEV2': '', + 'DUMMY': 'dummy1', 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', # Length of time in seconds to wait before terminating a command diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore new file mode 100644 index 000000000000..789f21e81028 --- /dev/null +++ b/tools/testing/selftests/timens/.gitignore @@ -0,0 +1,8 @@ +clock_nanosleep +exec +gettime_perf +gettime_perf_cold +procfs +timens +timer +timerfd diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile new file mode 100644 index 000000000000..e9fb30bd8aeb --- /dev/null +++ b/tools/testing/selftests/timens/Makefile @@ -0,0 +1,7 @@ +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec +TEST_GEN_PROGS_EXTENDED := gettime_perf + +CFLAGS := -Wall -Werror -pthread +LDFLAGS := -lrt -ldl + +include ../lib.mk diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c new file mode 100644 index 000000000000..8e7b7c72ef65 --- /dev/null +++ b/tools/testing/selftests/timens/clock_nanosleep.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> + +#include <sys/timerfd.h> +#include <sys/syscall.h> +#include <time.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <pthread.h> +#include <signal.h> +#include <string.h> + +#include "log.h" +#include "timens.h" + +void test_sig(int sig) +{ + if (sig == SIGUSR2) + pthread_exit(NULL); +} + +struct thread_args { + struct timespec *now, *rem; + pthread_mutex_t *lock; + int clockid; + int abs; +}; + +void *call_nanosleep(void *_args) +{ + struct thread_args *args = _args; + + clock_nanosleep(args->clockid, args->abs ? TIMER_ABSTIME : 0, args->now, args->rem); + pthread_mutex_unlock(args->lock); + return NULL; +} + +int run_test(int clockid, int abs) +{ + struct timespec now = {}, rem; + struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid}; + struct timespec start; + pthread_mutex_t lock; + pthread_t thread; + int j, ok, ret; + + signal(SIGUSR1, test_sig); + signal(SIGUSR2, test_sig); + + pthread_mutex_init(&lock, NULL); + pthread_mutex_lock(&lock); + + if (clock_gettime(clockid, &start) == -1) { + if (errno == EINVAL && check_skip(clockid)) + return 0; + return pr_perror("clock_gettime"); + } + + + if (abs) { + now.tv_sec = start.tv_sec; + now.tv_nsec = start.tv_nsec; + } + + now.tv_sec += 3600; + args.abs = abs; + args.lock = &lock; + ret = pthread_create(&thread, NULL, call_nanosleep, &args); + if (ret != 0) { + pr_err("Unable to create a thread: %s", strerror(ret)); + return 1; + } + + /* Wait when the thread will call clock_nanosleep(). */ + ok = 0; + for (j = 0; j < 8; j++) { + /* The maximum timeout is about 5 seconds. */ + usleep(10000 << j); + + /* Try to interrupt clock_nanosleep(). */ + pthread_kill(thread, SIGUSR1); + + usleep(10000 << j); + /* Check whether clock_nanosleep() has been interrupted or not. */ + if (pthread_mutex_trylock(&lock) == 0) { + /**/ + ok = 1; + break; + } + } + if (!ok) + pthread_kill(thread, SIGUSR2); + pthread_join(thread, NULL); + pthread_mutex_destroy(&lock); + + if (!ok) { + ksft_test_result_pass("clockid: %d abs:%d timeout\n", clockid, abs); + return 1; + } + + if (rem.tv_sec < 3300 || rem.tv_sec > 3900) { + pr_fail("clockid: %d abs: %d remain: %ld\n", + clockid, abs, rem.tv_sec); + return 1; + } + ksft_test_result_pass("clockid: %d abs:%d\n", clockid, abs); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret, nsfd; + + nscheck(); + + ksft_set_plan(4); + + check_config_posix_timers(); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, 7 * 24 * 3600)) + return 1; + if (_settime(CLOCK_BOOTTIME, 9 * 24 * 3600)) + return 1; + + nsfd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (nsfd < 0) + return pr_perror("Unable to open timens_for_children"); + + if (setns(nsfd, CLONE_NEWTIME)) + return pr_perror("Unable to set timens"); + + ret = 0; + ret |= run_test(CLOCK_MONOTONIC, 0); + ret |= run_test(CLOCK_MONOTONIC, 1); + ret |= run_test(CLOCK_BOOTTIME_ALARM, 0); + ret |= run_test(CLOCK_BOOTTIME_ALARM, 1); + + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return ret; +} diff --git a/tools/testing/selftests/timens/config b/tools/testing/selftests/timens/config new file mode 100644 index 000000000000..4480620f6f49 --- /dev/null +++ b/tools/testing/selftests/timens/config @@ -0,0 +1 @@ +CONFIG_TIME_NS=y diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c new file mode 100644 index 000000000000..87b47b557a7a --- /dev/null +++ b/tools/testing/selftests/timens/exec.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <time.h> +#include <string.h> + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +int main(int argc, char *argv[]) +{ + struct timespec now, tst; + int status, i; + pid_t pid; + + if (argc > 1) { + if (sscanf(argv[1], "%ld", &now.tv_sec) != 1) + return pr_perror("sscanf"); + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec) > 5) + return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); + } + return 0; + } + + nscheck(); + + ksft_set_plan(1); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec) > 5) + return pr_fail("%ld %ld\n", + now.tv_sec, tst.tv_sec); + } + + if (argc > 1) + return 0; + + pid = fork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + /* Check that a child process is in the new timens. */ + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5) + return pr_fail("%ld %ld\n", + now.tv_sec + OFFSET, tst.tv_sec); + } + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + return pr_perror("execve"); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + + ksft_test_result_pass("exec\n"); + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c new file mode 100644 index 000000000000..7bf841a3967b --- /dev/null +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <time.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <dlfcn.h> + +#include "log.h" +#include "timens.h" + +typedef int (*vgettime_t)(clockid_t, struct timespec *); + +vgettime_t vdso_clock_gettime; + +static void fill_function_pointers(void) +{ + void *vdso = dlopen("linux-vdso.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-gate.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) { + pr_err("[WARN]\tfailed to find vDSO\n"); + return; + } + + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); + if (!vdso_clock_gettime) + pr_err("Warning: failed to find clock_gettime in vDSO\n"); + +} + +static void test(clock_t clockid, char *clockstr, bool in_ns) +{ + struct timespec tp, start; + long i = 0; + const int timeout = 3; + + vdso_clock_gettime(clockid, &start); + tp = start; + for (tp = start; start.tv_sec + timeout > tp.tv_sec || + (start.tv_sec + timeout == tp.tv_sec && + start.tv_nsec > tp.tv_nsec); i++) { + vdso_clock_gettime(clockid, &tp); + } + + ksft_test_result_pass("%s:\tclock: %10s\tcycles:\t%10ld\n", + in_ns ? "ns" : "host", clockstr, i); +} + +int main(int argc, char *argv[]) +{ + time_t offset = 10; + int nsfd; + + ksft_set_plan(8); + + fill_function_pointers(); + + test(CLOCK_MONOTONIC, "monotonic", false); + test(CLOCK_MONOTONIC_COARSE, "monotonic-coarse", false); + test(CLOCK_MONOTONIC_RAW, "monotonic-raw", false); + test(CLOCK_BOOTTIME, "boottime", false); + + nscheck(); + + if (unshare_timens()) + return 1; + + nsfd = open("/proc/self/ns/time_for_children", O_RDONLY); + if (nsfd < 0) + return pr_perror("Can't open a time namespace"); + + if (_settime(CLOCK_MONOTONIC, offset)) + return 1; + if (_settime(CLOCK_BOOTTIME, offset)) + return 1; + + if (setns(nsfd, CLONE_NEWTIME)) + return pr_perror("setns"); + + test(CLOCK_MONOTONIC, "monotonic", true); + test(CLOCK_MONOTONIC_COARSE, "monotonic-coarse", true); + test(CLOCK_MONOTONIC_RAW, "monotonic-raw", true); + test(CLOCK_BOOTTIME, "boottime", true); + + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/timens/log.h b/tools/testing/selftests/timens/log.h new file mode 100644 index 000000000000..db64df2a8483 --- /dev/null +++ b/tools/testing/selftests/timens/log.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SELFTEST_TIMENS_LOG_H__ +#define __SELFTEST_TIMENS_LOG_H__ + +#define pr_msg(fmt, lvl, ...) \ + ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \ + lvl, __FILE__, __LINE__, ##__VA_ARGS__) + +#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__) + +#define pr_err(fmt, ...) \ + ({ \ + ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_fail(fmt, ...) \ + ({ \ + ksft_test_result_fail(fmt, ##__VA_ARGS__); \ + -1; \ + }) + +#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__) + +#endif diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c new file mode 100644 index 000000000000..43d93f4006b9 --- /dev/null +++ b/tools/testing/selftests/timens/procfs.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <math.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <time.h> + +#include "log.h" +#include "timens.h" + +/* + * Test shouldn't be run for a day, so add 10 days to child + * time and check parent's time to be in the same day. + */ +#define MAX_TEST_TIME_SEC (60*5) +#define DAY_IN_SEC (60*60*24) +#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +static int child_ns, parent_ns; + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWTIME)) + return pr_perror("setns()"); + + return 0; +} + +static int init_namespaces(void) +{ + char path[] = "/proc/self/ns/time_for_children"; + struct stat st1, st2; + + parent_ns = open(path, O_RDONLY); + if (parent_ns <= 0) + return pr_perror("Unable to open %s", path); + + if (fstat(parent_ns, &st1)) + return pr_perror("Unable to stat the parent timens"); + + if (unshare_timens()) + return -1; + + child_ns = open(path, O_RDONLY); + if (child_ns <= 0) + return pr_perror("Unable to open %s", path); + + if (fstat(child_ns, &st2)) + return pr_perror("Unable to stat the timens"); + + if (st1.st_ino == st2.st_ino) + return pr_err("The same child_ns after CLONE_NEWTIME"); + + if (_settime(CLOCK_BOOTTIME, TEN_DAYS_IN_SEC)) + return -1; + + return 0; +} + +static int read_proc_uptime(struct timespec *uptime) +{ + unsigned long up_sec, up_nsec; + FILE *proc; + + proc = fopen("/proc/uptime", "r"); + if (proc == NULL) { + pr_perror("Unable to open /proc/uptime"); + return -1; + } + + if (fscanf(proc, "%lu.%02lu", &up_sec, &up_nsec) != 2) { + if (errno) { + pr_perror("fscanf"); + return -errno; + } + pr_err("failed to parse /proc/uptime"); + return -1; + } + fclose(proc); + + uptime->tv_sec = up_sec; + uptime->tv_nsec = up_nsec; + return 0; +} + +static int check_uptime(void) +{ + struct timespec uptime_new, uptime_old; + time_t uptime_expected; + double prec = MAX_TEST_TIME_SEC; + + if (switch_ns(parent_ns)) + return pr_err("switch_ns(%d)", parent_ns); + + if (read_proc_uptime(&uptime_old)) + return 1; + + if (switch_ns(child_ns)) + return pr_err("switch_ns(%d)", child_ns); + + if (read_proc_uptime(&uptime_new)) + return 1; + + uptime_expected = uptime_old.tv_sec + TEN_DAYS_IN_SEC; + if (fabs(difftime(uptime_new.tv_sec, uptime_expected)) > prec) { + pr_fail("uptime in /proc/uptime: old %ld, new %ld [%ld]", + uptime_old.tv_sec, uptime_new.tv_sec, + uptime_old.tv_sec + TEN_DAYS_IN_SEC); + return 1; + } + + ksft_test_result_pass("Passed for /proc/uptime\n"); + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + + nscheck(); + + ksft_set_plan(1); + + if (init_namespaces()) + return 1; + + ret |= check_uptime(); + + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return ret; +} diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c new file mode 100644 index 000000000000..559d26e21ba0 --- /dev/null +++ b/tools/testing/selftests/timens/timens.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <time.h> +#include <string.h> + +#include "log.h" +#include "timens.h" + +/* + * Test shouldn't be run for a day, so add 10 days to child + * time and check parent's time to be in the same day. + */ +#define DAY_IN_SEC (60*60*24) +#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct test_clock { + clockid_t id; + char *name; + /* + * off_id is -1 if a clock has own offset, or it contains an index + * which contains a right offset of this clock. + */ + int off_id; + time_t offset; +}; + +#define ct(clock, off_id) { clock, #clock, off_id } +static struct test_clock clocks[] = { + ct(CLOCK_BOOTTIME, -1), + ct(CLOCK_BOOTTIME_ALARM, 1), + ct(CLOCK_MONOTONIC, -1), + ct(CLOCK_MONOTONIC_COARSE, 1), + ct(CLOCK_MONOTONIC_RAW, 1), +}; +#undef ct + +static int child_ns, parent_ns = -1; + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWTIME)) { + pr_perror("setns()"); + return -1; + } + + return 0; +} + +static int init_namespaces(void) +{ + char path[] = "/proc/self/ns/time_for_children"; + struct stat st1, st2; + + if (parent_ns == -1) { + parent_ns = open(path, O_RDONLY); + if (parent_ns <= 0) + return pr_perror("Unable to open %s", path); + } + + if (fstat(parent_ns, &st1)) + return pr_perror("Unable to stat the parent timens"); + + if (unshare_timens()) + return -1; + + child_ns = open(path, O_RDONLY); + if (child_ns <= 0) + return pr_perror("Unable to open %s", path); + + if (fstat(child_ns, &st2)) + return pr_perror("Unable to stat the timens"); + + if (st1.st_ino == st2.st_ino) + return pr_perror("The same child_ns after CLONE_NEWTIME"); + + return 0; +} + +static int test_gettime(clockid_t clock_index, bool raw_syscall, time_t offset) +{ + struct timespec child_ts_new, parent_ts_old, cur_ts; + char *entry = raw_syscall ? "syscall" : "vdso"; + double precision = 0.0; + + if (check_skip(clocks[clock_index].id)) + return 0; + + switch (clocks[clock_index].id) { + case CLOCK_MONOTONIC_COARSE: + case CLOCK_MONOTONIC_RAW: + precision = -2.0; + break; + } + + if (switch_ns(parent_ns)) + return pr_err("switch_ns(%d)", child_ns); + + if (_gettime(clocks[clock_index].id, &parent_ts_old, raw_syscall)) + return -1; + + child_ts_new.tv_nsec = parent_ts_old.tv_nsec; + child_ts_new.tv_sec = parent_ts_old.tv_sec + offset; + + if (switch_ns(child_ns)) + return pr_err("switch_ns(%d)", child_ns); + + if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall)) + return -1; + + if (difftime(cur_ts.tv_sec, child_ts_new.tv_sec) < precision) { + ksft_test_result_fail( + "Child's %s (%s) time has not changed: %lu -> %lu [%lu]\n", + clocks[clock_index].name, entry, parent_ts_old.tv_sec, + child_ts_new.tv_sec, cur_ts.tv_sec); + return -1; + } + + if (switch_ns(parent_ns)) + return pr_err("switch_ns(%d)", parent_ns); + + if (_gettime(clocks[clock_index].id, &cur_ts, raw_syscall)) + return -1; + + if (difftime(cur_ts.tv_sec, parent_ts_old.tv_sec) > DAY_IN_SEC) { + ksft_test_result_fail( + "Parent's %s (%s) time has changed: %lu -> %lu [%lu]\n", + clocks[clock_index].name, entry, parent_ts_old.tv_sec, + child_ts_new.tv_sec, cur_ts.tv_sec); + /* Let's play nice and put it closer to original */ + clock_settime(clocks[clock_index].id, &cur_ts); + return -1; + } + + ksft_test_result_pass("Passed for %s (%s)\n", + clocks[clock_index].name, entry); + return 0; +} + +int main(int argc, char *argv[]) +{ + unsigned int i; + time_t offset; + int ret = 0; + + nscheck(); + + check_config_posix_timers(); + + ksft_set_plan(ARRAY_SIZE(clocks) * 2); + + if (init_namespaces()) + return 1; + + /* Offsets have to be set before tasks enter the namespace. */ + for (i = 0; i < ARRAY_SIZE(clocks); i++) { + if (clocks[i].off_id != -1) + continue; + offset = TEN_DAYS_IN_SEC + i * 1000; + clocks[i].offset = offset; + if (_settime(clocks[i].id, offset)) + return 1; + } + + for (i = 0; i < ARRAY_SIZE(clocks); i++) { + if (clocks[i].off_id != -1) + offset = clocks[clocks[i].off_id].offset; + else + offset = clocks[i].offset; + ret |= test_gettime(i, true, offset); + ret |= test_gettime(i, false, offset); + } + + if (ret) + ksft_exit_fail(); + + ksft_exit_pass(); + return !!ret; +} diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h new file mode 100644 index 000000000000..e09e7e39bc52 --- /dev/null +++ b/tools/testing/selftests/timens/timens.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TIMENS_H__ +#define __TIMENS_H__ + +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> + +#include "../kselftest.h" + +#ifndef CLONE_NEWTIME +# define CLONE_NEWTIME 0x00000080 +#endif + +static int config_posix_timers = true; + +static inline void check_config_posix_timers(void) +{ + if (timer_create(-1, 0, 0) == -1 && errno == ENOSYS) + config_posix_timers = false; +} + +static inline bool check_skip(int clockid) +{ + if (config_posix_timers) + return false; + + switch (clockid) { + /* Only these clocks are supported without CONFIG_POSIX_TIMERS. */ + case CLOCK_BOOTTIME: + case CLOCK_MONOTONIC: + case CLOCK_REALTIME: + return false; + default: + ksft_test_result_skip("Posix Clocks & timers are not supported\n"); + return true; + } + + return false; +} + +static inline int unshare_timens(void) +{ + if (unshare(CLONE_NEWTIME)) { + if (errno == EPERM) + ksft_exit_skip("need to run as root\n"); + return pr_perror("Can't unshare() timens"); + } + return 0; +} + +static inline int _settime(clockid_t clk_id, time_t offset) +{ + int fd, len; + char buf[4096]; + + if (clk_id == CLOCK_MONOTONIC_COARSE || clk_id == CLOCK_MONOTONIC_RAW) + clk_id = CLOCK_MONOTONIC; + + len = snprintf(buf, sizeof(buf), "%d %ld 0", clk_id, offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + + return 0; +} + +static inline int _gettime(clockid_t clk_id, struct timespec *res, bool raw_syscall) +{ + int err; + + if (!raw_syscall) { + if (clock_gettime(clk_id, res)) { + pr_perror("clock_gettime(%d)", (int)clk_id); + return -1; + } + return 0; + } + + err = syscall(SYS_clock_gettime, clk_id, res); + if (err) + pr_perror("syscall(SYS_clock_gettime(%d))", (int)clk_id); + + return err; +} + +static inline void nscheck(void) +{ + if (access("/proc/self/ns/time", F_OK) < 0) + ksft_exit_skip("Time namespaces are not supported\n"); +} + +#endif diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c new file mode 100644 index 000000000000..0cca7aafc4bd --- /dev/null +++ b/tools/testing/selftests/timens/timer.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> + +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <signal.h> +#include <time.h> + +#include "log.h" +#include "timens.h" + +int run_test(int clockid, struct timespec now) +{ + struct itimerspec new_value; + long long elapsed; + timer_t fd; + int i; + + for (i = 0; i < 2; i++) { + struct sigevent sevp = {.sigev_notify = SIGEV_NONE}; + int flags = 0; + + new_value.it_value.tv_sec = 3600; + new_value.it_value.tv_nsec = 0; + new_value.it_interval.tv_sec = 1; + new_value.it_interval.tv_nsec = 0; + + if (i == 1) { + new_value.it_value.tv_sec += now.tv_sec; + new_value.it_value.tv_nsec += now.tv_nsec; + } + + if (timer_create(clockid, &sevp, &fd) == -1) { + if (errno == ENOSYS) { + ksft_test_result_skip("Posix Clocks & timers are supported\n"); + return 0; + } + return pr_perror("timerfd_create"); + } + + if (i == 1) + flags |= TIMER_ABSTIME; + if (timer_settime(fd, flags, &new_value, NULL) == -1) + return pr_perror("timerfd_settime"); + + if (timer_gettime(fd, &new_value) == -1) + return pr_perror("timerfd_gettime"); + + elapsed = new_value.it_value.tv_sec; + if (abs(elapsed - 3600) > 60) { + ksft_test_result_fail("clockid: %d elapsed: %lld\n", + clockid, elapsed); + return 1; + } + } + + ksft_test_result_pass("clockid=%d\n", clockid); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret, status, len, fd; + char buf[4096]; + pid_t pid; + struct timespec btime_now, mtime_now; + + nscheck(); + + ksft_set_plan(3); + + clock_gettime(CLOCK_MONOTONIC, &mtime_now); + clock_gettime(CLOCK_BOOTTIME, &btime_now); + + if (unshare_timens()) + return 1; + + len = snprintf(buf, sizeof(buf), "%d %d 0\n%d %d 0", + CLOCK_MONOTONIC, 70 * 24 * 3600, + CLOCK_BOOTTIME, 9 * 24 * 3600); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + mtime_now.tv_sec += 70 * 24 * 3600; + btime_now.tv_sec += 9 * 24 * 3600; + + pid = fork(); + if (pid < 0) + return pr_perror("Unable to fork"); + if (pid == 0) { + ret = 0; + ret |= run_test(CLOCK_BOOTTIME, btime_now); + ret |= run_test(CLOCK_MONOTONIC, mtime_now); + ret |= run_test(CLOCK_BOOTTIME_ALARM, btime_now); + + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return ret; + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("Unable to wait the child process"); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c new file mode 100644 index 000000000000..eff1ec5ff215 --- /dev/null +++ b/tools/testing/selftests/timens/timerfd.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> + +#include <sys/timerfd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> + +#include "log.h" +#include "timens.h" + +static int tclock_gettime(clock_t clockid, struct timespec *now) +{ + if (clockid == CLOCK_BOOTTIME_ALARM) + clockid = CLOCK_BOOTTIME; + return clock_gettime(clockid, now); +} + +int run_test(int clockid, struct timespec now) +{ + struct itimerspec new_value; + long long elapsed; + int fd, i; + + if (tclock_gettime(clockid, &now)) + return pr_perror("clock_gettime(%d)", clockid); + + for (i = 0; i < 2; i++) { + int flags = 0; + + new_value.it_value.tv_sec = 3600; + new_value.it_value.tv_nsec = 0; + new_value.it_interval.tv_sec = 1; + new_value.it_interval.tv_nsec = 0; + + if (i == 1) { + new_value.it_value.tv_sec += now.tv_sec; + new_value.it_value.tv_nsec += now.tv_nsec; + } + + fd = timerfd_create(clockid, 0); + if (fd == -1) + return pr_perror("timerfd_create(%d)", clockid); + + if (i == 1) + flags |= TFD_TIMER_ABSTIME; + + if (timerfd_settime(fd, flags, &new_value, NULL)) + return pr_perror("timerfd_settime(%d)", clockid); + + if (timerfd_gettime(fd, &new_value)) + return pr_perror("timerfd_gettime(%d)", clockid); + + elapsed = new_value.it_value.tv_sec; + if (abs(elapsed - 3600) > 60) { + ksft_test_result_fail("clockid: %d elapsed: %lld\n", + clockid, elapsed); + return 1; + } + + close(fd); + } + + ksft_test_result_pass("clockid=%d\n", clockid); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int ret, status, len, fd; + char buf[4096]; + pid_t pid; + struct timespec btime_now, mtime_now; + + nscheck(); + + ksft_set_plan(3); + + clock_gettime(CLOCK_MONOTONIC, &mtime_now); + clock_gettime(CLOCK_BOOTTIME, &btime_now); + + if (unshare_timens()) + return 1; + + len = snprintf(buf, sizeof(buf), "%d %d 0\n%d %d 0", + CLOCK_MONOTONIC, 70 * 24 * 3600, + CLOCK_BOOTTIME, 9 * 24 * 3600); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + mtime_now.tv_sec += 70 * 24 * 3600; + btime_now.tv_sec += 9 * 24 * 3600; + + pid = fork(); + if (pid < 0) + return pr_perror("Unable to fork"); + if (pid == 0) { + ret = 0; + ret |= run_test(CLOCK_BOOTTIME, btime_now); + ret |= run_test(CLOCK_MONOTONIC, mtime_now); + ret |= run_test(CLOCK_BOOTTIME_ALARM, btime_now); + + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return ret; + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("Unable to wait the child process"); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile index 9dd848427a7b..1a5db1eb8ed5 100644 --- a/tools/testing/selftests/tpm2/Makefile +++ b/tools/testing/selftests/tpm2/Makefile @@ -2,3 +2,4 @@ include ../lib.mk TEST_PROGS := test_smoke.sh test_space.sh +TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 80521d46220c..8155c2ea7ccb 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -2,3 +2,9 @@ # SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) python -m unittest -v tpm2_tests.SmokeTest +python -m unittest -v tpm2_tests.AsyncTest + +CLEAR_CMD=$(which tpm2_clear) +if [ -n $CLEAR_CMD ]; then + tpm2_clear -T device +fi diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py index 828c18584624..d0fcb66a88a6 100644 --- a/tools/testing/selftests/tpm2/tpm2.py +++ b/tools/testing/selftests/tpm2/tpm2.py @@ -6,8 +6,8 @@ import socket import struct import sys import unittest -from fcntl import ioctl - +import fcntl +import select TPM2_ST_NO_SESSIONS = 0x8001 TPM2_ST_SESSIONS = 0x8002 @@ -352,6 +352,7 @@ def hex_dump(d): class Client: FLAG_DEBUG = 0x01 FLAG_SPACE = 0x02 + FLAG_NONBLOCK = 0x04 TPM_IOC_NEW_SPACE = 0xa200 def __init__(self, flags = 0): @@ -362,13 +363,27 @@ class Client: else: self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0) + if (self.flags & Client.FLAG_NONBLOCK): + flags = fcntl.fcntl(self.tpm, fcntl.F_GETFL) + flags |= os.O_NONBLOCK + fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags) + self.tpm_poll = select.poll() + def close(self): self.tpm.close() def send_cmd(self, cmd): self.tpm.write(cmd) + + if (self.flags & Client.FLAG_NONBLOCK): + self.tpm_poll.register(self.tpm, select.POLLIN) + self.tpm_poll.poll(10000) + rsp = self.tpm.read() + if (self.flags & Client.FLAG_NONBLOCK): + self.tpm_poll.unregister(self.tpm) + if (self.flags & Client.FLAG_DEBUG) != 0: sys.stderr.write('cmd' + os.linesep) sys.stderr.write(hex_dump(cmd) + os.linesep) diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py index d4973be53493..728be7c69b76 100644 --- a/tools/testing/selftests/tpm2/tpm2_tests.py +++ b/tools/testing/selftests/tpm2/tpm2_tests.py @@ -288,3 +288,16 @@ class SpaceTest(unittest.TestCase): self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE | tpm2.TSS2_RESMGR_TPM_RC_LAYER) + +class AsyncTest(unittest.TestCase): + def setUp(self): + logging.basicConfig(filename='AsyncTest.log', level=logging.DEBUG) + + def test_async(self): + log = logging.getLogger(__name__) + log.debug(sys._getframe().f_code.co_name) + + async_client = tpm2.Client(tpm2.Client.FLAG_NONBLOCK) + log.debug("Calling get_cap in a NON_BLOCKING mode") + async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION) + async_client.close() diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 9534dc2bc929..7f9a8a8c31da 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for vm selftests +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt @@ -16,8 +18,11 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd + +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range +endif TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 1c0d76cb5adf..93b90a9b1eeb 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -1,2 +1,3 @@ CONFIG_SYSVIPC=y CONFIG_USERFAULTFD=y +CONFIG_TEST_VMALLOC=m diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index c0534e298b51..389327e9b30a 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -18,6 +18,9 @@ #define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) #define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +/* Just the flags we need, copied from mm.h: */ +#define FOLL_WRITE 0x01 /* check pte is writable */ + struct gup_benchmark { __u64 get_delta_usec; __u64 put_delta_usec; @@ -37,7 +40,7 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLUSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) { switch (opt) { case 'm': size = atoi(optarg) * MB; @@ -71,7 +74,7 @@ int main(int argc, char **argv) flags |= MAP_SHARED; break; case 'H': - flags |= MAP_HUGETLB; + flags |= (MAP_HUGETLB | MAP_ANONYMOUS); break; default: return -1; @@ -85,7 +88,8 @@ int main(int argc, char **argv) } gup.nr_pages_per_call = nr_pages; - gup.flags = write; + if (write) + gup.flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); if (fd == -1) diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 951c507a27f7..a692ea828317 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -58,6 +58,14 @@ else exit 1 fi +#filter 64bit architectures +ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64" +if [ -z $ARCH ]; then + ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'` +fi +VADDR64=0 +echo "$ARCH64STR" | grep $ARCH && VADDR64=1 + mkdir $mnt mount -t hugetlbfs none $mnt @@ -189,6 +197,7 @@ else echo "[PASS]" fi +if [ $VADDR64 -ne 0 ]; then echo "-----------------------------" echo "running virtual_address_range" echo "-----------------------------" @@ -210,6 +219,7 @@ if [ $? -ne 0 ]; then else echo "[PASS]" fi +fi # VADDR64 echo "------------------------------------" echo "running vmalloc stability smoke test" diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index c2333c78cf04..f45e510500c0 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -19,7 +19,7 @@ int fd; const char v = 'V'; -static const char sopts[] = "bdehp:t:Tn:NL"; +static const char sopts[] = "bdehp:t:Tn:NLf:i"; static const struct option lopts[] = { {"bootstatus", no_argument, NULL, 'b'}, {"disable", no_argument, NULL, 'd'}, @@ -31,6 +31,8 @@ static const struct option lopts[] = { {"pretimeout", required_argument, NULL, 'n'}, {"getpretimeout", no_argument, NULL, 'N'}, {"gettimeleft", no_argument, NULL, 'L'}, + {"file", required_argument, NULL, 'f'}, + {"info", no_argument, NULL, 'i'}, {NULL, no_argument, NULL, 0x0} }; @@ -69,16 +71,20 @@ static void term(int sig) static void usage(char *progname) { printf("Usage: %s [options]\n", progname); - printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); - printf(" -d, --disable Turn off the watchdog timer\n"); - printf(" -e, --enable Turn on the watchdog timer\n"); - printf(" -h, --help Print the help message\n"); - printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); - printf(" -t, --timeout=T Set timeout to T seconds\n"); - printf(" -T, --gettimeout Get the timeout\n"); - printf(" -n, --pretimeout=T Set the pretimeout to T seconds\n"); - printf(" -N, --getpretimeout Get the pretimeout\n"); - printf(" -L, --gettimeleft Get the time left until timer expires\n"); + printf(" -f, --file\t\tOpen watchdog device file\n"); + printf("\t\t\tDefault is /dev/watchdog\n"); + printf(" -i, --info\t\tShow watchdog_info\n"); + printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n"); + printf(" -d, --disable\t\tTurn off the watchdog timer\n"); + printf(" -e, --enable\t\tTurn on the watchdog timer\n"); + printf(" -h, --help\t\tPrint the help message\n"); + printf(" -p, --pingrate=P\tSet ping rate to P seconds (default %d)\n", + DEFAULT_PING_RATE); + printf(" -t, --timeout=T\tSet timeout to T seconds\n"); + printf(" -T, --gettimeout\tGet the timeout\n"); + printf(" -n, --pretimeout=T\tSet the pretimeout to T seconds\n"); + printf(" -N, --getpretimeout\tGet the pretimeout\n"); + printf(" -L, --gettimeleft\tGet the time left until timer expires\n"); printf("\n"); printf("Parameters are parsed left-to-right in real-time.\n"); printf("Example: %s -d -t 10 -p 5 -e\n", progname); @@ -92,14 +98,21 @@ int main(int argc, char *argv[]) int ret; int c; int oneshot = 0; + char *file = "/dev/watchdog"; + struct watchdog_info info; setbuf(stdout, NULL); - fd = open("/dev/watchdog", O_WRONLY); + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + if (c == 'f') + file = optarg; + } + + fd = open(file, O_WRONLY); if (fd == -1) { if (errno == ENOENT) - printf("Watchdog device not enabled.\n"); + printf("Watchdog device (%s) not found.\n", file); else if (errno == EACCES) printf("Run watchdog as root.\n"); else @@ -108,6 +121,18 @@ int main(int argc, char *argv[]) exit(-1); } + /* + * Validate that `file` is a watchdog device + */ + ret = ioctl(fd, WDIOC_GETSUPPORT, &info); + if (ret) { + printf("WDIOC_GETSUPPORT error '%s'\n", strerror(errno)); + close(fd); + exit(ret); + } + + optind = 0; + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { switch (c) { case 'b': @@ -190,6 +215,21 @@ int main(int argc, char *argv[]) else printf("WDIOC_GETTIMELEFT error '%s'\n", strerror(errno)); break; + case 'f': + /* Handled above */ + break; + case 'i': + /* + * watchdog_info was obtained as part of file open + * validation. So we just show it here. + */ + oneshot = 1; + printf("watchdog_info:\n"); + printf(" identity:\t\t%s\n", info.identity); + printf(" firmware_version:\t%u\n", + info.firmware_version); + printf(" options:\t\t%08x\n", info.options); + break; default: usage(argv[0]); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh new file mode 100755 index 000000000000..138d46b3f330 --- /dev/null +++ b/tools/testing/selftests/wireguard/netns.sh @@ -0,0 +1,550 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +# +# This script tests the below topology: +# +# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│ +# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││ +# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│ +# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││ +# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││ +# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│ +# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘ +# └──────────────────────────────────┘ +# +# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the +# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0 +# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further +# details on how this is accomplished. +set -e + +exec 3>&1 +export LANG=C +export WG_HIDE_KEYS=never +netns0="wg-test-$$-0" +netns1="wg-test-$$-1" +netns2="wg-test-$$-2" +pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; } +pp() { pretty "" "$*"; "$@"; } +maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; } +n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; } +n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; } +n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; } +ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } +ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } +ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } +sleep() { read -t "$1" -N 1 || true; } +waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } + +cleanup() { + set +e + exec 2>/dev/null + printf "$orig_message_cost" > /proc/sys/net/core/message_cost + ip0 link del dev wg0 + ip1 link del dev wg0 + ip2 link del dev wg0 + local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)" + [[ -n $to_kill ]] && kill $to_kill + pp ip netns del $netns1 + pp ip netns del $netns2 + pp ip netns del $netns0 + exit +} + +orig_message_cost="$(< /proc/sys/net/core/message_cost)" +trap cleanup EXIT +printf 0 > /proc/sys/net/core/message_cost + +ip netns del $netns0 2>/dev/null || true +ip netns del $netns1 2>/dev/null || true +ip netns del $netns2 2>/dev/null || true +pp ip netns add $netns0 +pp ip netns add $netns1 +pp ip netns add $netns2 +ip0 link set up dev lo + +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns1 +ip0 link add dev wg0 type wireguard +ip0 link set wg0 netns $netns2 +key1="$(pp wg genkey)" +key2="$(pp wg genkey)" +key3="$(pp wg genkey)" +pub1="$(pp wg pubkey <<<"$key1")" +pub2="$(pp wg pubkey <<<"$key2")" +pub3="$(pp wg pubkey <<<"$key3")" +psk="$(pp wg genpsk)" +[[ -n $key1 && -n $key2 && -n $psk ]] + +configure_peers() { + ip1 addr add 192.168.241.1/24 dev wg0 + ip1 addr add fd00::1/24 dev wg0 + + ip2 addr add 192.168.241.2/24 dev wg0 + ip2 addr add fd00::2/24 dev wg0 + + n1 wg set wg0 \ + private-key <(echo "$key1") \ + listen-port 1 \ + peer "$pub2" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.2/32,fd00::2/128 + n2 wg set wg0 \ + private-key <(echo "$key2") \ + listen-port 2 \ + peer "$pub1" \ + preshared-key <(echo "$psk") \ + allowed-ips 192.168.241.1/32,fd00::1/128 + + ip1 link set up dev wg0 + ip2 link set up dev wg0 +} +configure_peers + +tests() { + # Ping over IPv4 + n2 ping -c 10 -f -W 1 192.168.241.1 + n1 ping -c 10 -f -W 1 192.168.241.2 + + # Ping over IPv6 + n2 ping6 -c 10 -f -W 1 fd00::1 + n1 ping6 -c 10 -f -W 1 fd00::2 + + # TCP over IPv4 + n2 iperf3 -s -1 -B 192.168.241.2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -c 192.168.241.2 + + # TCP over IPv6 + n1 iperf3 -s -1 -B fd00::1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -c fd00::1 + + # UDP over IPv4 + n1 iperf3 -s -1 -B 192.168.241.1 & + waitiperf $netns1 $! + n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1 + + # UDP over IPv6 + n2 iperf3 -s -1 -B fd00::2 & + waitiperf $netns2 $! + n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 +} + +[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" +big_mtu=$(( 34816 - 1500 + $orig_mtu )) + +# Test using IPv4 as outer transport +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +# Before calling tests, we first make sure that the stats counters and timestamper are working +n2 ping -c 10 -f -W 1 192.168.241.1 +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer) +(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) )) +read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer) +(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) )) +read _ timestamp < <(n1 wg show wg0 latest-handshakes) +(( timestamp != 0 )) + +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv6 as outer transport +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n2 wg set wg0 peer "$pub1" endpoint [::1]:1 +tests +ip1 link set wg0 mtu $big_mtu +ip2 link set wg0 mtu $big_mtu +tests + +# Test that route MTUs work with the padding +ip1 link set wg0 mtu 1300 +ip2 link set wg0 mtu 1300 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1 +n0 iptables -A INPUT -m length --length 1360 -j DROP +n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299 +n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299 +n2 ping -c 1 -W 1 -s 1269 192.168.241.1 +n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299 +n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299 +n0 iptables -F INPUT + +ip1 link set wg0 mtu $orig_mtu +ip2 link set wg0 mtu $orig_mtu + +# Test using IPv4 that roaming works +ip0 -4 addr del 127.0.0.1/8 dev lo +ip0 -4 addr add 127.212.121.99/8 dev lo +n1 wg set wg0 listen-port 9999 +n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2 +n1 ping6 -W 1 -c 1 fd00::2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]] + +# Test using IPv6 that roaming works +n1 wg set wg0 listen-port 9998 +n1 wg set wg0 peer "$pub2" endpoint [::1]:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]] + +# Test that crypto-RP filter works +n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]] +kill $ncat_pid +more_specific_key="$(pp wg genkey | pp wg pubkey)" +n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32 +n2 wg set wg0 listen-port 9997 +exec 4< <(n1 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns1 $ncat_pid +n2 ncat -u 192.168.241.1 1111 <<<"X" +! read -r -N 1 -t 1 out <&4 || false +kill $ncat_pid +n1 wg set wg0 peer "$more_specific_key" remove +[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]] + +# Test that we can change private keys keys and immediately handshake +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2 +n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 private-key <(echo "$key3") +n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove +n1 ping -W 1 -c 1 192.168.241.2 + +ip1 link del wg0 +ip2 link del wg0 + +# Test using NAT. We now change the topology to this: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │ +# │ │ │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers + +ip0 link add vethrc type veth peer name vethc +ip0 link add vethrs type veth peer name veths +ip0 link set vethc netns $netns1 +ip0 link set veths netns $netns2 +ip0 link set vethrc up +ip0 link set vethrs up +ip0 addr add 192.168.1.1/24 dev vethrc +ip0 addr add 10.0.0.1/24 dev vethrs +ip1 addr add 192.168.1.100/24 dev vethc +ip1 link set vethc up +ip1 route add default via 192.168.1.1 +ip2 addr add 10.0.0.100/24 dev veths +ip2 link set veths up +waitiface $netns0 vethrc +waitiface $netns0 vethrs +waitiface $netns1 vethc +waitiface $netns2 veths + +n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' +n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' +n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 + +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1 +n1 ping -W 1 -c 1 192.168.241.2 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`). +pp sleep 3 +n2 ping -W 1 -c 1 192.168.241.1 +n1 wg set wg0 peer "$pub2" persistent-keepalive 0 + +# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs. +ip1 -6 addr add fc00::9/96 dev vethc +ip1 -6 route add default via fc00::1 +ip2 -4 addr add 192.168.99.7/32 dev wg0 +ip2 -6 addr add abab::1111/128 dev wg0 +n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111 +ip1 -6 route add default dev wg0 table 51820 +ip1 -6 rule add not fwmark 51820 table 51820 +ip1 -6 rule add table main suppress_prefixlength 0 +ip1 -4 route add default dev wg0 table 51820 +ip1 -4 rule add not fwmark 51820 table 51820 +ip1 -4 rule add table main suppress_prefixlength 0 +# Flood the pings instead of sending just one, to trigger routing table reference counting bugs. +n1 ping -W 1 -c 100 -f 192.168.99.7 +n1 ping -W 1 -c 100 -f abab::1111 + +# Have ns2 NAT into wg0 packets from ns0, but return an icmp error along the right route. +n2 iptables -t nat -A POSTROUTING -s 10.0.0.0/24 -d 192.168.241.0/24 -j SNAT --to 192.168.241.2 +n0 iptables -t filter -A INPUT \! -s 10.0.0.0/24 -i vethrs -j DROP # Manual rpfilter just to be explicit. +n2 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +ip0 -4 route add 192.168.241.1 via 10.0.0.100 +n2 wg set wg0 peer "$pub1" remove +[[ $(! n0 ping -W 1 -c 1 192.168.241.1 || false) == *"From 10.0.0.100 icmp_seq=1 Destination Host Unreachable"* ]] + +n0 iptables -t nat -F +n0 iptables -t filter -F +n2 iptables -t nat -F +ip0 link del vethrc +ip0 link del vethrs +ip1 link del wg0 +ip2 link del wg0 + +# Test that saddr routing is sticky but not too sticky, changing to this topology: +# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐ +# │ $ns1 namespace │ │ $ns2 namespace │ +# │ │ │ │ +# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │ +# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │ +# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │ +# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │ +# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │ +# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │ +# └────────────────────────────────────────┘ └────────────────────────────────────────┘ + +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad' +n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad' +n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad' +n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries' + +# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip2 addr add fd00:aa::2/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add 10.0.0.10/24 dev veth1 +ip1 addr del 10.0.0.1/24 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n1 ping -W 1 -c 1 192.168.241.2 +ip1 addr add fd00:aa::10/96 dev veth1 +ip1 addr del fd00:aa::1/96 dev veth1 +n1 ping -W 1 -c 1 192.168.241.2 + +# Now we show that we can successfully do reply to sender routing +ip1 link set veth1 down +ip2 link set veth2 down +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip1 addr add 10.0.0.2/24 dev veth1 +ip1 addr add fd00:aa::1/96 dev veth1 +ip1 addr add fd00:aa::2/96 dev veth1 +ip2 addr add 10.0.0.3/24 dev veth2 +ip2 addr add fd00:aa::3/96 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]] +n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]] +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]] + +# What happens if the inbound destination address belongs to a different interface as the default route? +ip1 link add dummy0 type dummy +ip1 addr add 10.50.0.1/24 dev dummy0 +ip1 link set dummy0 up +ip2 route add 10.50.0.0/24 dev veth2 +n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1 +n2 ping -W 1 -c 1 192.168.241.1 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]] + +ip1 link del dummy0 +ip1 addr flush dev veth1 +ip2 addr flush dev veth2 +ip1 route flush dev veth1 +ip2 route flush dev veth2 + +# Now we see what happens if another interface route takes precedence over an ongoing one +ip1 link add veth3 type veth peer name veth4 +ip1 link set veth4 netns $netns2 +ip1 addr add 10.0.0.1/24 dev veth1 +ip2 addr add 10.0.0.2/24 dev veth2 +ip1 addr add 10.0.0.3/24 dev veth3 +ip1 link set veth1 up +ip2 link set veth2 up +ip1 link set veth3 up +ip2 link set veth4 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +waitiface $netns1 veth3 +waitiface $netns2 veth4 +ip1 route flush dev veth1 +ip1 route flush dev veth3 +ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2 +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2 +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]] +ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1 +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter' +n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' +n1 ping -W 1 -c 1 192.168.241.2 +[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] + +ip1 link del veth1 +ip1 link del veth3 +ip1 link del wg0 +ip2 link del wg0 + +# We test that Netlink/IPC is working properly by doing things that usually cause split responses +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" ) +for a in {1..255}; do + for b in {0..255}; do + config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +for ip in $(n0 wg show wg0 allowed-ips); do + ((++i)) +done +((i == 255*256*2+1)) +ip0 link del wg0 +ip0 link add dev wg0 type wireguard +config=( "[Interface]" "PrivateKey=$(wg genkey)" ) +for a in {1..40}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) + for b in {1..52}; do + config+=( "AllowedIPs=$a.$b.0.0/16" ) + done +done +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +i=0 +while read -r line; do + j=0 + for ip in $line; do + ((++j)) + done + ((j == 53)) + ((++i)) +done < <(n0 wg show wg0 allowed-ips) +((i == 40)) +ip0 link del wg0 +ip0 link add wg0 type wireguard +config=( ) +for i in {1..29}; do + config+=( "[Peer]" "PublicKey=$(wg genkey)" ) +done +config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" ) +n0 wg setconf wg0 <(printf '%s\n' "${config[@]}") +n0 wg showconf wg0 > /dev/null +ip0 link del wg0 + +allowedips=( ) +for i in {1..197}; do + allowedips+=( abcd::$i ) +done +saved_ifs="$IFS" +IFS=, +allowedips="${allowedips[*]}" +IFS="$saved_ifs" +ip0 link add wg0 type wireguard +n0 wg set wg0 peer "$pub1" +n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips" +{ + read -r pub allowedips + [[ $pub == "$pub1" && $allowedips == "(none)" ]] + read -r pub allowedips + [[ $pub == "$pub2" ]] + i=0 + for _ in $allowedips; do + ((++i)) + done + ((i == 197)) +} < <(n0 wg show wg0 allowed-ips) +ip0 link del wg0 + +! n0 wg show doesnotexist || false + +ip0 link add wg0 type wireguard +n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") +[[ $(n0 wg show wg0 private-key) == "$key1" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]] +n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null +[[ $(n0 wg show wg0 private-key) == "(none)" ]] +[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]] +n0 wg set wg0 peer "$pub2" +n0 wg set wg0 private-key <(echo "$key2") +[[ $(n0 wg show wg0 public-key) == "$pub2" ]] +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 peer "$pub2" +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") +n0 wg set wg0 peer "$pub2" +[[ $(n0 wg show wg0 peers) == "$pub2" ]] +n0 wg set wg0 private-key <(echo "/${key1:1}") +[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]] +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16 +n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 +n0 wg set wg0 peer "$pub2" allowed-ips ::/0 +n0 wg set wg0 peer "$pub2" remove +low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) +n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } +[[ -z $(n0 wg show wg0 peers) ]] +ip0 link del wg0 + +declare -A objects +while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do + [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue + objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}" +done < /dev/kmsg +alldeleted=1 +for object in "${!objects[@]}"; do + if [[ ${objects["$object"]} != *createddestroyed ]]; then + echo "Error: $object: merely ${objects["$object"]}" >&3 + alldeleted=0 + fi +done +[[ $alldeleted -eq 1 ]] +pretty "" "Objects that were created were also destroyed." diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore new file mode 100644 index 000000000000..415b542a9d59 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/.gitignore @@ -0,0 +1,2 @@ +build/ +distfiles/ diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile new file mode 100644 index 000000000000..f10aa3590adc --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -0,0 +1,387 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + +PWD := $(shell pwd) + +CHOST := $(shell gcc -dumpmachine) +HOST_ARCH := $(firstword $(subst -, ,$(CHOST))) +ifneq (,$(ARCH)) +CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc)))))) +ifeq (,$(CBUILD)) +$(error The toolchain for $(ARCH) is not installed) +endif +else +CBUILD := $(CHOST) +ARCH := $(firstword $(subst -, ,$(CBUILD))) +endif + +# Set these from the environment to override +KERNEL_PATH ?= $(PWD)/../../../../.. +BUILD_PATH ?= $(PWD)/build/$(ARCH) +DISTFILES_PATH ?= $(PWD)/distfiles +NR_CPUS ?= 4 + +MIRROR := https://download.wireguard.com/qemu-test/distfiles/ + +default: qemu + +# variable name, tarball project name, version, tarball extension, default URI base +define tar_download = +$(1)_VERSION := $(3) +$(1)_NAME := $(2)-$$($(1)_VERSION) +$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4) +$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME) +$(call file_download,$$($(1)_NAME)$(4),$(5),$(6)) +endef + +define file_download = +$(DISTFILES_PATH)/$(1): + mkdir -p $(DISTFILES_PATH) + flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp' + if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi +endef + +$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) +$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81)) +$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) +$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) +$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) +$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c)) +$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa)) +$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a)) +$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f)) + +KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) +WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) + +export CFLAGS ?= -O3 -pipe +export LDFLAGS ?= +export CPPFLAGS := -I$(BUILD_PATH)/include + +ifeq ($(HOST_ARCH),$(ARCH)) +CROSS_COMPILE_FLAG := --host=$(CHOST) +CFLAGS += -march=native +STRIP := strip +else +$(info Cross compilation: building for $(CBUILD) using $(CHOST)) +CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) +export CROSS_COMPILE=$(CBUILD)- +STRIP := $(CBUILD)-strip +endif +ifeq ($(ARCH),aarch64) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),aarch64_be) +QEMU_ARCH := aarch64 +KERNEL_ARCH := arm64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a53 -machine virt +CFLAGS += -march=armv8-a -mtune=cortex-a53 +endif +else ifeq ($(ARCH),arm) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux +endif +else ifeq ($(ARCH),armeb) +QEMU_ARCH := arm +KERNEL_ARCH := arm +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm +else +QEMU_MACHINE := -cpu cortex-a15 -machine virt +CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. +LDFLAGS += -Wl,--be8 +endif +else ifeq ($(ARCH),x86_64) +QEMU_ARCH := x86_64 +KERNEL_ARCH := x86_64 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu Skylake-Server -machine q35 +CFLAGS += -march=skylake-avx512 +endif +else ifeq ($(ARCH),i686) +QEMU_ARCH := i386 +KERNEL_ARCH := x86 +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) +QEMU_MACHINE := -cpu host -machine q35,accel=kvm +else +QEMU_MACHINE := -cpu coreduo -machine q35 +CFLAGS += -march=prescott +endif +else ifeq ($(ARCH),mips64) +QEMU_ARCH := mips64 +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EB +endif +else ifeq ($(ARCH),mips64el) +QEMU_ARCH := mips64el +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1 +CFLAGS += -march=mips64r2 -EL +endif +else ifeq ($(ARCH),mips) +QEMU_ARCH := mips +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EB +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EB +endif +else ifeq ($(ARCH),mipsel) +QEMU_ARCH := mipsel +KERNEL_ARCH := mips +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host -machine malta,accel=kvm +CFLAGS += -EL +else +QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1 +CFLAGS += -march=mips32r2 -EL +endif +else ifeq ($(ARCH),powerpc64le) +QEMU_ARCH := ppc64 +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine pseries +else +QEMU_MACHINE := -machine pseries +endif +CFLAGS += -mcpu=powerpc64le -mlong-double-64 +else ifeq ($(ARCH),powerpc) +QEMU_ARCH := ppc +KERNEL_ARCH := powerpc +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500 +else +QEMU_MACHINE := -machine ppce500 +endif +CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt +else ifeq ($(ARCH),m68k) +QEMU_ARCH := m68k +KERNEL_ARCH := m68k +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) +ifeq ($(HOST_ARCH),$(ARCH)) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +else +QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +endif +else +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k) +endif + +REAL_CC := $(CBUILD)-gcc +MUSL_CC := $(BUILD_PATH)/musl-gcc +export CC := $(MUSL_CC) +USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed + +build: $(KERNEL_BZIMAGE) +qemu: $(KERNEL_BZIMAGE) + rm -f $(BUILD_PATH)/result + timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ + -nodefaults \ + -nographic \ + -smp $(NR_CPUS) \ + $(QEMU_MACHINE) \ + -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + -serial stdio \ + -serial file:$(BUILD_PATH)/result \ + -no-reboot \ + -monitor none \ + -kernel $< + grep -Fq success $(BUILD_PATH)/result + +$(BUILD_PATH)/init-cpio-spec.txt: + mkdir -p $(BUILD_PATH) + echo "file /init $(BUILD_PATH)/init 755 0 0" > $@ + echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@ + echo "dir /dev 755 0 0" >> $@ + echo "nod /dev/console 644 0 0 c 5 1" >> $@ + echo "dir /bin 755 0 0" >> $@ + echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@ + echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@ + echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@ + echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@ + echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@ + echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@ + echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@ + echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@ + echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@ + echo "slink /bin/ping6 ping 777 0 0" >> $@ + echo "dir /lib 755 0 0" >> $@ + echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@ + echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@ + +$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config + mkdir -p $(KERNEL_BUILD_PATH) + cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config + printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config + cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig + cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config + $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) + +$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) + +$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install + touch $@ + +$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD) + $(MAKE) -C $(MUSL_PATH) + $(STRIP) -s $@ + +$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so + $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers + touch $@ + +$(MUSL_CC): $(MUSL_PATH)/lib/libc.so + sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs + printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc + chmod +x $(BUILD_PATH)/musl-gcc + +$(IPERF_PATH)/.installed: $(IPERF_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h + sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile* + touch $@ + +$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS) + cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no + $(MAKE) -C $(IPERF_PATH) + $(STRIP) -s $@ + +$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS) + cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared + $(MAKE) -C $(LIBMNL_PATH) + sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc + +$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg + $(STRIP) -s $@ + +$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS) + mkdir -p $(BUILD_PATH) + $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $< + $(STRIP) -s $@ + +$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS) + sed -i /atexit/d $(IPUTILS_PATH)/ping.c + cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS) + $(STRIP) -s $@ + +$(BASH_PATH)/.installed: $(BASH_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS) + cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble + $(MAKE) -C $(BASH_PATH) + $(STRIP) -s $@ + +$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk + printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile + touch $@ + +$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip + $(STRIP) -s $(IPROUTE2_PATH)/ip/ip + +$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss + $(STRIP) -s $(IPROUTE2_PATH)/misc/ss + +$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure + touch $@ + +$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS) + cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include + $(MAKE) -C $(IPTABLES_PATH) + $(STRIP) -s $@ + +$(NMAP_PATH)/.installed: $(NMAP_TAR) + mkdir -p $(BUILD_PATH) + flock -s $<.lock tar -C $(BUILD_PATH) -xf $< + touch $@ + +$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS) + cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh + $(MAKE) -C $(NMAP_PATH)/libpcap + $(MAKE) -C $(NMAP_PATH)/ncat + $(STRIP) -s $@ + +clean: + rm -rf $(BUILD_PATH) + +distclean: clean + rm -rf $(DISTFILES_PATH) + +menuconfig: $(KERNEL_BUILD_PATH)/.config + $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig + +.PHONY: qemu build clean distclean menuconfig +.DELETE_ON_ERROR: diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config new file mode 100644 index 000000000000..3d063bb247bb --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config new file mode 100644 index 000000000000..dbdc7e406a7b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config @@ -0,0 +1,6 @@ +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config new file mode 100644 index 000000000000..148f49905418 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config new file mode 100644 index 000000000000..bd76b07d00a2 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -0,0 +1,10 @@ +CONFIG_MMU=y +CONFIG_ARCH_MULTI_V7=y +CONFIG_ARCH_VIRT=y +CONFIG_THUMB2_KERNEL=n +CONFIG_SERIAL_AMBA_PL011=y +CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config new file mode 100644 index 000000000000..a85025d7206e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config new file mode 100644 index 000000000000..62a15bdb877e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -0,0 +1,9 @@ +CONFIG_MMU=y +CONFIG_M68KCLASSIC=y +CONFIG_M68040=y +CONFIG_MAC=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config new file mode 100644 index 000000000000..df71d6b95546 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -0,0 +1,11 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config new file mode 100644 index 000000000000..90c783f725c4 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config @@ -0,0 +1,14 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config new file mode 100644 index 000000000000..435b0b43e00c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config @@ -0,0 +1,15 @@ +CONFIG_64BIT=y +CONFIG_CPU_MIPS64_R2=y +CONFIG_MIPS32_N32=y +CONFIG_CPU_HAS_MSA=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config new file mode 100644 index 000000000000..62bb50c4a85f --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -0,0 +1,12 @@ +CONFIG_CPU_MIPS32_R2=y +CONFIG_MIPS_MALTA=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_MIPS_CPS=y +CONFIG_MIPS_FP_SUPPORT=y +CONFIG_POWER_RESET=y +CONFIG_POWER_RESET_SYSCON=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config new file mode 100644 index 000000000000..57957093b71b --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -0,0 +1,10 @@ +CONFIG_PPC_QEMU_E500=y +CONFIG_FSL_SOC_BOOKE=y +CONFIG_PPC_85xx=y +CONFIG_PHYS_64BIT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_MATH_EMULATION=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config new file mode 100644 index 000000000000..990c510a9cfa --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config @@ -0,0 +1,12 @@ +CONFIG_PPC64=y +CONFIG_PPC_PSERIES=y +CONFIG_ALTIVEC=y +CONFIG_VSX=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_HVC_CONSOLE=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config new file mode 100644 index 000000000000..00a1ef4869d5 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -0,0 +1,5 @@ +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config new file mode 100644 index 000000000000..5909e7ef2a5c --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -0,0 +1,66 @@ +CONFIG_LOCALVERSION="-debug" +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_POINTER=y +CONFIG_STACK_VALIDATION=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_PAGE_EXTENSION=y +CONFIG_PAGE_POISONING=y +CONFIG_DEBUG_OBJECTS=y +CONFIG_DEBUG_OBJECTS_FREE=y +CONFIG_DEBUG_OBJECTS_TIMERS=y +CONFIG_DEBUG_OBJECTS_WORK=y +CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y +CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1 +CONFIG_SLUB_DEBUG_ON=y +CONFIG_DEBUG_VM=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_HAVE_DEBUG_STACKOVERFLOW=y +CONFIG_DEBUG_STACKOVERFLOW=y +CONFIG_HAVE_ARCH_KMEMCHECK=y +CONFIG_HAVE_ARCH_KASAN=y +CONFIG_KASAN=y +CONFIG_KASAN_INLINE=y +CONFIG_UBSAN=y +CONFIG_UBSAN_SANITIZE_ALL=y +CONFIG_UBSAN_NO_ALIGNMENT=y +CONFIG_UBSAN_NULL=y +CONFIG_DEBUG_KMEMLEAK=y +CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 +CONFIG_DEBUG_STACK_USAGE=y +CONFIG_DEBUG_SHIRQ=y +CONFIG_WQ_WATCHDOG=y +CONFIG_SCHED_DEBUG=y +CONFIG_SCHED_INFO=y +CONFIG_SCHEDSTATS=y +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_DEBUG_TIMEKEEPING=y +CONFIG_TIMER_STATS=y +CONFIG_DEBUG_PREEMPT=y +CONFIG_DEBUG_RT_MUTEXES=y +CONFIG_DEBUG_SPINLOCK=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y +CONFIG_LOCKDEP=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_TRACE_IRQFLAGS=y +CONFIG_DEBUG_BUGVERBOSE=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_PI_LIST=y +CONFIG_PROVE_RCU=y +CONFIG_SPARSE_RCU_POINTER=y +CONFIG_RCU_CPU_STALL_TIMEOUT=21 +CONFIG_RCU_TRACE=y +CONFIG_RCU_EQS_DEBUG=y +CONFIG_USER_STACKTRACE_SUPPORT=y +CONFIG_DEBUG_SG=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DOUBLEFAULT=y +CONFIG_X86_DEBUG_FPU=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y +CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c new file mode 100644 index 000000000000..90bc9813cadc --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define _GNU_SOURCE +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <fcntl.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/io.h> +#include <sys/ioctl.h> +#include <sys/reboot.h> +#include <sys/utsname.h> +#include <sys/sendfile.h> +#include <sys/sysmacros.h> +#include <linux/random.h> +#include <linux/version.h> + +__attribute__((noreturn)) static void poweroff(void) +{ + fflush(stdout); + fflush(stderr); + reboot(RB_AUTOBOOT); + sleep(30); + fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n"); + exit(1); +} + +static void panic(const char *what) +{ + fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno)); + poweroff(); +} + +#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m") + +static void print_banner(void) +{ + struct utsname utsname; + int len; + + if (uname(&utsname) < 0) + panic("uname"); + + len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine); + printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, ""); +} + +static void seed_rng(void) +{ + int fd; + struct { + int entropy_count; + int buffer_size; + unsigned char buffer[256]; + } entropy = { + .entropy_count = sizeof(entropy.buffer) * 8, + .buffer_size = sizeof(entropy.buffer), + .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" + }; + + if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) + panic("mknod(/dev/urandom)"); + fd = open("/dev/urandom", O_WRONLY); + if (fd < 0) + panic("open(urandom)"); + for (int i = 0; i < 256; ++i) { + if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) + panic("ioctl(urandom)"); + } + close(fd); +} + +static void mount_filesystems(void) +{ + pretty_message("[+] Mounting filesystems..."); + mkdir("/dev", 0755); + mkdir("/proc", 0755); + mkdir("/sys", 0755); + mkdir("/tmp", 0755); + mkdir("/run", 0755); + mkdir("/var", 0755); + if (mount("none", "/dev", "devtmpfs", 0, NULL)) + panic("devtmpfs mount"); + if (mount("none", "/proc", "proc", 0, NULL)) + panic("procfs mount"); + if (mount("none", "/sys", "sysfs", 0, NULL)) + panic("sysfs mount"); + if (mount("none", "/tmp", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/run", "tmpfs", 0, NULL)) + panic("tmpfs mount"); + if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL)) + ; /* Not a problem if it fails.*/ + if (symlink("/run", "/var/run")) + panic("run symlink"); + if (symlink("/proc/self/fd", "/dev/fd")) + panic("fd symlink"); +} + +static void enable_logging(void) +{ + int fd; + pretty_message("[+] Enabling logging..."); + fd = open("/proc/sys/kernel/printk", O_WRONLY); + if (fd >= 0) { + if (write(fd, "9\n", 2) != 2) + panic("write(printk)"); + close(fd); + } + fd = open("/proc/sys/debug/exception-trace", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(exception-trace)"); + close(fd); + } + fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); + if (fd >= 0) { + if (write(fd, "1\n", 2) != 2) + panic("write(panic_on_warn)"); + close(fd); + } +} + +static void kmod_selftests(void) +{ + FILE *file; + char line[2048], *start, *pass; + bool success = true; + pretty_message("[+] Module self-tests:"); + file = fopen("/proc/kmsg", "r"); + if (!file) + panic("fopen(kmsg)"); + if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0) + panic("fcntl(kmsg, nonblock)"); + while (fgets(line, sizeof(line), file)) { + start = strstr(line, "wireguard: "); + if (!start) + continue; + start += 11; + *strchrnul(start, '\n') = '\0'; + if (strstr(start, "www.wireguard.com")) + break; + pass = strstr(start, ": pass"); + if (!pass || pass[6] != '\0') { + success = false; + printf(" \x1b[31m* %s\x1b[0m\n", start); + } else + printf(" \x1b[32m* %s\x1b[0m\n", start); + } + fclose(file); + if (!success) { + puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m"); + poweroff(); + } +} + +static void launch_tests(void) +{ + char cmdline[4096], *success_dev; + int status, fd; + pid_t pid; + + pretty_message("[+] Launching tests..."); + pid = fork(); + if (pid == -1) + panic("fork"); + else if (pid == 0) { + execl("/init.sh", "init", NULL); + panic("exec"); + } + if (waitpid(pid, &status, 0) < 0) + panic("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + pretty_message("[+] Tests successful! :-)"); + fd = open("/proc/cmdline", O_RDONLY); + if (fd < 0) + panic("open(/proc/cmdline)"); + if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0) + panic("read(/proc/cmdline)"); + cmdline[sizeof(cmdline) - 1] = '\0'; + for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) { + if (strncmp(success_dev, "wg.success=", 11)) + continue; + memcpy(success_dev + 11 - 5, "/dev/", 5); + success_dev += 11 - 5; + break; + } + if (!success_dev || !strlen(success_dev)) + panic("Unable to find success device"); + + fd = open(success_dev, O_WRONLY); + if (fd < 0) + panic("open(success_dev)"); + if (write(fd, "success\n", 8) != 8) + panic("write(success_dev)"); + close(fd); + } else { + const char *why = "unknown cause"; + int what = -1; + + if (WIFEXITED(status)) { + why = "exit code"; + what = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + why = "signal"; + what = WTERMSIG(status); + } + printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what); + } +} + +static void ensure_console(void) +{ + for (unsigned int i = 0; i < 1000; ++i) { + int fd = open("/dev/console", O_RDWR); + if (fd < 0) { + usleep(50000); + continue; + } + dup2(fd, 0); + dup2(fd, 1); + dup2(fd, 2); + close(fd); + if (write(1, "\0\0\0\0\n", 5) == 5) + return; + } + panic("Unable to open console device"); +} + +static void clear_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Starting memory leak detection..."); + write(fd, "clear\n", 5); + close(fd); +} + +static void check_leaks(void) +{ + int fd; + + fd = open("/sys/kernel/debug/kmemleak", O_WRONLY); + if (fd < 0) + return; + pretty_message("[+] Scanning for memory leaks..."); + sleep(2); /* Wait for any grace periods. */ + write(fd, "scan\n", 5); + close(fd); + + fd = open("/sys/kernel/debug/kmemleak", O_RDONLY); + if (fd < 0) + return; + if (sendfile(1, fd, NULL, 0x7ffff000) > 0) + panic("Memory leaks encountered"); + close(fd); +} + +int main(int argc, char *argv[]) +{ + seed_rng(); + ensure_console(); + print_banner(); + mount_filesystems(); + kmod_selftests(); + enable_logging(); + clear_leaks(); + launch_tests(); + check_leaks(); + poweroff(); + return 1; +} diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config new file mode 100644 index 000000000000..af9323a0b6e0 --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -0,0 +1,88 @@ +CONFIG_LOCALVERSION="" +CONFIG_NET=y +CONFIG_NETDEVICES=y +CONFIG_NET_CORE=y +CONFIG_NET_IPIP=y +CONFIG_DUMMY=y +CONFIG_VETH=y +CONFIG_MULTIUSER=y +CONFIG_NAMESPACES=y +CONFIG_NET_NS=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IPV6=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NF_CONNTRACK=y +CONFIG_NF_NAT=y +CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XT_NAT=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NF_CONNTRACK_IPV4=y +CONFIG_NF_NAT_IPV4=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_NAT=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_TTY=y +CONFIG_BINFMT_ELF=y +CONFIG_BINFMT_SCRIPT=y +CONFIG_VDSO=y +CONFIG_VIRTUALIZATION=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_PARAVIRT_SPINLOCKS=y +CONFIG_PRINTK=y +CONFIG_KALLSYMS=y +CONFIG_BUG=y +CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y +CONFIG_JUMP_LABEL=y +CONFIG_EMBEDDED=n +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_SHMEM=y +CONFIG_SLUB=y +CONFIG_SPARSEMEM_VMEMMAP=y +CONFIG_SMP=y +CONFIG_SCHED_SMT=y +CONFIG_SCHED_MC=y +CONFIG_NUMA=y +CONFIG_PREEMPT=y +CONFIG_NO_HZ=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HZ_PERIODIC=n +CONFIG_HIGH_RES_TIMERS=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_ARCH_RANDOM=y +CONFIG_FILE_LOCKING=y +CONFIG_POSIX_TIMERS=y +CONFIG_DEVTMPFS=y +CONFIG_PROC_FS=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_PRINTK_TIME=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_KERNEL_GZIP=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_BUG_ON_DATA_CORRUPTION=y +CONFIG_LOCKUP_DETECTOR=y +CONFIG_SOFTLOCKUP_DETECTOR=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_WQ_WATCHDOG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y +CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y +CONFIG_PANIC_TIMEOUT=-1 +CONFIG_STACKTRACE=y +CONFIG_EARLY_PRINTK=y +CONFIG_GDB_SCRIPTS=y +CONFIG_WIREGUARD=y +CONFIG_WIREGUARD_DEBUG=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index fa07d526fe39..5d49bfec1e9a 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,13 +11,13 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ - check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ + check_initial_reg_state sigreturn iopl ioperm \ protection_keys test_vdso test_vsyscall mov_ss_trap \ syscall_arg_fault TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c index 01de41c1b725..57ec5e99edb9 100644 --- a/tools/testing/selftests/x86/ioperm.c +++ b/tools/testing/selftests/x86/ioperm.c @@ -131,6 +131,17 @@ int main(void) printf("[RUN]\tchild: check that we inherited permissions\n"); expect_ok(0x80); expect_gp(0xed); + printf("[RUN]\tchild: Extend permissions to 0x81\n"); + if (ioperm(0x81, 1, 1) != 0) { + printf("[FAIL]\tioperm(0x81, 1, 1) failed (%d)", errno); + return 1; + } + printf("[RUN]\tchild: Drop permissions to 0x80\n"); + if (ioperm(0x80, 1, 0) != 0) { + printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno); + return 1; + } + expect_gp(0x80); return 0; } else { int status; @@ -146,8 +157,11 @@ int main(void) } } - /* Test the capability checks. */ + /* Verify that the child dropping 0x80 did not affect the parent */ + printf("\tVerify that unsharing the bitmap worked\n"); + expect_ok(0x80); + /* Test the capability checks. */ printf("\tDrop privileges\n"); if (setresuid(1, 1, 1) != 0) { printf("[WARN]\tDropping privileges failed\n"); diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c index 6aa27f34644c..bab2f6e06b63 100644 --- a/tools/testing/selftests/x86/iopl.c +++ b/tools/testing/selftests/x86/iopl.c @@ -35,6 +35,16 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), } +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + static jmp_buf jmpbuf; static void sigsegv(int sig, siginfo_t *si, void *ctx_void) @@ -42,25 +52,128 @@ static void sigsegv(int sig, siginfo_t *si, void *ctx_void) siglongjmp(jmpbuf, 1); } +static bool try_outb(unsigned short port) +{ + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); + if (sigsetjmp(jmpbuf, 1) != 0) { + return false; + } else { + asm volatile ("outb %%al, %w[port]" + : : [port] "Nd" (port), "a" (0)); + return true; + } + clearhandler(SIGSEGV); +} + +static void expect_ok_outb(unsigned short port) +{ + if (!try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx failed\n", port); + exit(1); + } + + printf("[OK]\toutb to 0x%02hx worked\n", port); +} + +static void expect_gp_outb(unsigned short port) +{ + if (try_outb(port)) { + printf("[FAIL]\toutb to 0x%02hx worked\n", port); + nerrs++; + } + + printf("[OK]\toutb to 0x%02hx failed\n", port); +} + +static bool try_cli(void) +{ + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); + if (sigsetjmp(jmpbuf, 1) != 0) { + return false; + } else { + asm volatile ("cli"); + return true; + } + clearhandler(SIGSEGV); +} + +static bool try_sti(void) +{ + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); + if (sigsetjmp(jmpbuf, 1) != 0) { + return false; + } else { + asm volatile ("sti"); + return true; + } + clearhandler(SIGSEGV); +} + +static void expect_gp_sti(void) +{ + if (try_sti()) { + printf("[FAIL]\tSTI worked\n"); + nerrs++; + } else { + printf("[OK]\tSTI faulted\n"); + } +} + +static void expect_gp_cli(void) +{ + if (try_cli()) { + printf("[FAIL]\tCLI worked\n"); + nerrs++; + } else { + printf("[OK]\tCLI faulted\n"); + } +} + int main(void) { cpu_set_t cpuset; + CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) err(1, "sched_setaffinity to CPU 0"); /* Probe for iopl support. Note that iopl(0) works even as nonroot. */ - if (iopl(3) != 0) { + switch(iopl(3)) { + case 0: + break; + case -ENOSYS: + printf("[OK]\tiopl() nor supported\n"); + return 0; + default: printf("[OK]\tiopl(3) failed (%d) -- try running as root\n", errno); return 0; } - /* Restore our original state prior to starting the test. */ + /* Make sure that CLI/STI are blocked even with IOPL level 3 */ + expect_gp_cli(); + expect_gp_sti(); + expect_ok_outb(0x80); + + /* Establish an I/O bitmap to test the restore */ + if (ioperm(0x80, 1, 1) != 0) + err(1, "ioperm(0x80, 1, 1) failed\n"); + + /* Restore our original state prior to starting the fork test. */ if (iopl(0) != 0) err(1, "iopl(0)"); + /* + * Verify that IOPL emulation is disabled and the I/O bitmap still + * works. + */ + expect_ok_outb(0x80); + expect_gp_outb(0xed); + /* Drop the I/O bitmap */ + if (ioperm(0x80, 1, 0) != 0) + err(1, "ioperm(0x80, 1, 0) failed\n"); + pid_t child = fork(); if (child == -1) err(1, "fork"); @@ -90,14 +203,9 @@ int main(void) printf("[RUN]\tparent: write to 0x80 (should fail)\n"); - sethandler(SIGSEGV, sigsegv, 0); - if (sigsetjmp(jmpbuf, 1) != 0) { - printf("[OK]\twrite was denied\n"); - } else { - asm volatile ("outb %%al, $0x80" : : "a" (0)); - printf("[FAIL]\twrite was allowed\n"); - nerrs++; - } + expect_gp_outb(0x80); + expect_gp_cli(); + expect_gp_sti(); /* Test the capability checks. */ printf("\tiopl(3)\n"); @@ -133,4 +241,3 @@ int main(void) done: return nerrs ? 1 : 0; } - diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c index 3c3a022654f3..6da0ac3f0135 100644 --- a/tools/testing/selftests/x86/mov_ss_trap.c +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -257,7 +257,8 @@ int main() err(1, "sigaltstack"); sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); nr = SYS_getpid; - asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + /* Clear EBP first to make sure we segfault cleanly. */ + asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr) : [ss] "m" (ss) : "flags", "rcx" #ifdef __x86_64__ , "r11" diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h deleted file mode 100644 index 7546eba7f17a..000000000000 --- a/tools/testing/selftests/x86/mpx-debug.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MPX_DEBUG_H -#define _MPX_DEBUG_H - -#ifndef DEBUG_LEVEL -#define DEBUG_LEVEL 0 -#endif -#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0) -#define dprintf1(args...) dprintf_level(1, args) -#define dprintf2(args...) dprintf_level(2, args) -#define dprintf3(args...) dprintf_level(3, args) -#define dprintf4(args...) dprintf_level(4, args) -#define dprintf5(args...) dprintf_level(5, args) - -#endif /* _MPX_DEBUG_H */ diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c deleted file mode 100644 index 880fbf676968..000000000000 --- a/tools/testing/selftests/x86/mpx-dig.c +++ /dev/null @@ -1,497 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Written by Dave Hansen <dave.hansen@intel.com> - */ - -#include <stdlib.h> -#include <sys/types.h> -#include <unistd.h> -#include <stdio.h> -#include <errno.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <string.h> -#include <fcntl.h> -#include "mpx-debug.h" -#include "mpx-mm.h" -#include "mpx-hw.h" - -unsigned long bounds_dir_global; - -#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__) -static void inline __mpx_dig_abort(const char *file, const char *func, int line) -{ - fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func); - printf("MPX dig abort @ %s::%d in %s()\n", file, line, func); - abort(); -} - -/* - * run like this (BDIR finds the probably bounds directory): - * - * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \ - * | head -1 | awk -F- '{print $1}')"; - * ./mpx-dig $pid 0x$BDIR - * - * NOTE: - * assumes that the only 2097152-kb VMA is the bounds dir - */ - -long nr_incore(void *ptr, unsigned long size_bytes) -{ - int i; - long ret = 0; - long vec_len = size_bytes / PAGE_SIZE; - unsigned char *vec = malloc(vec_len); - int incore_ret; - - if (!vec) - mpx_dig_abort(); - - incore_ret = mincore(ptr, size_bytes, vec); - if (incore_ret) { - printf("mincore ret: %d\n", incore_ret); - perror("mincore"); - mpx_dig_abort(); - } - for (i = 0; i < vec_len; i++) - ret += vec[i]; - free(vec); - return ret; -} - -int open_proc(int pid, char *file) -{ - static char buf[100]; - int fd; - - snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file); - fd = open(&buf[0], O_RDONLY); - if (fd < 0) - perror(buf); - - return fd; -} - -struct vaddr_range { - unsigned long start; - unsigned long end; -}; -struct vaddr_range *ranges; -int nr_ranges_allocated; -int nr_ranges_populated; -int last_range = -1; - -int __pid_load_vaddrs(int pid) -{ - int ret = 0; - int proc_maps_fd = open_proc(pid, "maps"); - char linebuf[10000]; - unsigned long start; - unsigned long end; - char rest[1000]; - FILE *f = fdopen(proc_maps_fd, "r"); - - if (!f) - mpx_dig_abort(); - nr_ranges_populated = 0; - while (!feof(f)) { - char *readret = fgets(linebuf, sizeof(linebuf), f); - int parsed; - - if (readret == NULL) { - if (feof(f)) - break; - mpx_dig_abort(); - } - - parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest); - if (parsed != 3) - mpx_dig_abort(); - - dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest); - if (nr_ranges_populated >= nr_ranges_allocated) { - ret = -E2BIG; - break; - } - ranges[nr_ranges_populated].start = start; - ranges[nr_ranges_populated].end = end; - nr_ranges_populated++; - } - last_range = -1; - fclose(f); - close(proc_maps_fd); - return ret; -} - -int pid_load_vaddrs(int pid) -{ - int ret; - - dprintf2("%s(%d)\n", __func__, pid); - if (!ranges) { - nr_ranges_allocated = 4; - ranges = malloc(nr_ranges_allocated * sizeof(ranges[0])); - dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid, - nr_ranges_allocated, ranges); - assert(ranges != NULL); - } - do { - ret = __pid_load_vaddrs(pid); - if (!ret) - break; - if (ret == -E2BIG) { - dprintf2("%s(%d) need to realloc\n", __func__, pid); - nr_ranges_allocated *= 2; - ranges = realloc(ranges, - nr_ranges_allocated * sizeof(ranges[0])); - dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, - pid, nr_ranges_allocated, ranges); - assert(ranges != NULL); - dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated); - } - } while (1); - - dprintf2("%s(%d) done\n", __func__, pid); - - return ret; -} - -static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r) -{ - if (vaddr < r->start) - return 0; - if (vaddr >= r->end) - return 0; - return 1; -} - -static inline int vaddr_mapped_by_range(unsigned long vaddr) -{ - int i; - - if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range])) - return 1; - - for (i = 0; i < nr_ranges_populated; i++) { - struct vaddr_range *r = &ranges[i]; - - if (vaddr_in_range(vaddr, r)) - continue; - last_range = i; - return 1; - } - return 0; -} - -const int bt_entry_size_bytes = sizeof(unsigned long) * 4; - -void *read_bounds_table_into_buf(unsigned long table_vaddr) -{ -#ifdef MPX_DIG_STANDALONE - static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES]; - off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET); - if (seek_ret != table_vaddr) - mpx_dig_abort(); - - int read_ret = read(fd, &bt_buf, sizeof(bt_buf)); - if (read_ret != sizeof(bt_buf)) - mpx_dig_abort(); - return &bt_buf; -#else - return (void *)table_vaddr; -#endif -} - -int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr, - unsigned long bde_vaddr) -{ - unsigned long offset_inside_bt; - int nr_entries = 0; - int do_abort = 0; - char *bt_buf; - - dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n", - __func__, base_controlled_vaddr, bde_vaddr); - - bt_buf = read_bounds_table_into_buf(table_vaddr); - - dprintf4("%s() read done\n", __func__); - - for (offset_inside_bt = 0; - offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES; - offset_inside_bt += bt_entry_size_bytes) { - unsigned long bt_entry_index; - unsigned long bt_entry_controls; - unsigned long this_bt_entry_for_vaddr; - unsigned long *bt_entry_buf; - int i; - - dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__, - offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES); - bt_entry_buf = (void *)&bt_buf[offset_inside_bt]; - if (!bt_buf) { - printf("null bt_buf\n"); - mpx_dig_abort(); - } - if (!bt_entry_buf) { - printf("null bt_entry_buf\n"); - mpx_dig_abort(); - } - dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__, - bt_entry_buf); - if (!bt_entry_buf[0] && - !bt_entry_buf[1] && - !bt_entry_buf[2] && - !bt_entry_buf[3]) - continue; - - nr_entries++; - - bt_entry_index = offset_inside_bt/bt_entry_size_bytes; - bt_entry_controls = sizeof(void *); - this_bt_entry_for_vaddr = - base_controlled_vaddr + bt_entry_index*bt_entry_controls; - /* - * We sign extend vaddr bits 48->63 which effectively - * creates a hole in the virtual address space. - * This calculation corrects for the hole. - */ - if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL) - this_bt_entry_for_vaddr |= 0xffff800000000000; - - if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) { - printf("bt_entry_buf: %p\n", bt_entry_buf); - printf("there is a bte for %lx but no mapping\n", - this_bt_entry_for_vaddr); - printf(" bde vaddr: %016lx\n", bde_vaddr); - printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr); - printf(" table_vaddr: %016lx\n", table_vaddr); - printf(" entry vaddr: %016lx @ offset %lx\n", - table_vaddr + offset_inside_bt, offset_inside_bt); - do_abort = 1; - mpx_dig_abort(); - } - if (DEBUG_LEVEL < 4) - continue; - - printf("table entry[%lx]: ", offset_inside_bt); - for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long)) - printf("0x%016lx ", bt_entry_buf[i]); - printf("\n"); - } - if (do_abort) - mpx_dig_abort(); - dprintf4("%s() done\n", __func__); - return nr_entries; -} - -int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes, - int *nr_populated_bdes) -{ - unsigned long i; - int total_entries = 0; - - dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf, - len_bytes, bd_offset_bytes, buf + len_bytes); - - for (i = 0; i < len_bytes; i += sizeof(unsigned long)) { - unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long); - unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i]; - unsigned long bounds_dir_entry; - unsigned long bd_for_vaddr; - unsigned long bt_start; - unsigned long bt_tail; - int nr_entries; - - dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i, - bounds_dir_entry_ptr); - - bounds_dir_entry = *bounds_dir_entry_ptr; - if (!bounds_dir_entry) { - dprintf4("no bounds dir at index 0x%lx / 0x%lx " - "start at offset:%lx %lx\n", bd_index, bd_index, - bd_offset_bytes, i); - continue; - } - dprintf3("found bounds_dir_entry: 0x%lx @ " - "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i, - &buf[i]); - /* mask off the enable bit: */ - bounds_dir_entry &= ~0x1; - (*nr_populated_bdes)++; - dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes); - dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes); - - bt_start = bounds_dir_entry; - bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1; - if (!vaddr_mapped_by_range(bt_start)) { - printf("bounds directory 0x%lx points to nowhere\n", - bounds_dir_entry); - mpx_dig_abort(); - } - if (!vaddr_mapped_by_range(bt_tail)) { - printf("bounds directory end 0x%lx points to nowhere\n", - bt_tail); - mpx_dig_abort(); - } - /* - * Each bounds directory entry controls 1MB of virtual address - * space. This variable is the virtual address in the process - * of the beginning of the area controlled by this bounds_dir. - */ - bd_for_vaddr = bd_index * (1UL<<20); - - nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr, - bounds_dir_global+bd_offset_bytes+i); - total_entries += nr_entries; - dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries " - "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n", - bd_index, buf+i, - bounds_dir_entry, nr_entries, total_entries, - bd_for_vaddr, bd_for_vaddr + (1UL<<20)); - } - dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes, - bd_offset_bytes); - return total_entries; -} - -int proc_pid_mem_fd = -1; - -void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir, - long buffer_size_bytes, void *buffer) -{ - unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir; - int read_ret; - off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET); - - if (seek_ret != seekto) - mpx_dig_abort(); - - read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes); - /* there shouldn't practically be short reads of /proc/$pid/mem */ - if (read_ret != buffer_size_bytes) - mpx_dig_abort(); - - return buffer; -} -void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir, - long buffer_size_bytes, void *buffer) - -{ - unsigned char vec[buffer_size_bytes / PAGE_SIZE]; - char *dig_bounds_dir_ptr = - (void *)(bounds_dir_global + byte_offset_inside_bounds_dir); - /* - * use mincore() to quickly find the areas of the bounds directory - * that have memory and thus will be worth scanning. - */ - int incore_ret; - - int incore = 0; - int i; - - dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr); - - incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]); - if (incore_ret) { - printf("mincore ret: %d\n", incore_ret); - perror("mincore"); - mpx_dig_abort(); - } - for (i = 0; i < sizeof(vec); i++) - incore += vec[i]; - dprintf4("%s() total incore: %d\n", __func__, incore); - if (!incore) - return NULL; - dprintf3("%s() total incore: %d\n", __func__, incore); - return dig_bounds_dir_ptr; -} - -int inspect_pid(int pid) -{ - static int dig_nr; - long offset_inside_bounds_dir; - char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)]; - char *dig_bounds_dir_ptr; - int total_entries = 0; - int nr_populated_bdes = 0; - int inspect_self; - - if (getpid() == pid) { - dprintf4("inspecting self\n"); - inspect_self = 1; - } else { - dprintf4("inspecting pid %d\n", pid); - mpx_dig_abort(); - } - - for (offset_inside_bounds_dir = 0; - offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES; - offset_inside_bounds_dir += sizeof(bounds_dir_buf)) { - static int bufs_skipped; - int this_entries; - - if (inspect_self) { - dig_bounds_dir_ptr = - fill_bounds_dir_buf_self(offset_inside_bounds_dir, - sizeof(bounds_dir_buf), - &bounds_dir_buf[0]); - } else { - dig_bounds_dir_ptr = - fill_bounds_dir_buf_other(offset_inside_bounds_dir, - sizeof(bounds_dir_buf), - &bounds_dir_buf[0]); - } - if (!dig_bounds_dir_ptr) { - bufs_skipped++; - continue; - } - this_entries = search_bd_buf(dig_bounds_dir_ptr, - sizeof(bounds_dir_buf), - offset_inside_bounds_dir, - &nr_populated_bdes); - total_entries += this_entries; - } - printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr, - total_entries, nr_populated_bdes); - return total_entries + nr_populated_bdes; -} - -#ifdef MPX_DIG_REMOTE -int main(int argc, char **argv) -{ - int err; - char *c; - unsigned long bounds_dir_entry; - int pid; - - printf("mpx-dig starting...\n"); - err = sscanf(argv[1], "%d", &pid); - printf("parsing: '%s', err: %d\n", argv[1], err); - if (err != 1) - mpx_dig_abort(); - - err = sscanf(argv[2], "%lx", &bounds_dir_global); - printf("parsing: '%s': %d\n", argv[2], err); - if (err != 1) - mpx_dig_abort(); - - proc_pid_mem_fd = open_proc(pid, "mem"); - if (proc_pid_mem_fd < 0) - mpx_dig_abort(); - - inspect_pid(pid); - return 0; -} -#endif - -long inspect_me(struct mpx_bounds_dir *bounds_dir) -{ - int pid = getpid(); - - pid_load_vaddrs(pid); - bounds_dir_global = (unsigned long)bounds_dir; - dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir); - return inspect_pid(pid); -} diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h deleted file mode 100644 index d1b61ab870f8..000000000000 --- a/tools/testing/selftests/x86/mpx-hw.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MPX_HW_H -#define _MPX_HW_H - -#include <assert.h> - -/* Describe the MPX Hardware Layout in here */ - -#define NR_MPX_BOUNDS_REGISTERS 4 - -#ifdef __i386__ - -#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */ -#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */ -#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4 -#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */ - -#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2 -#define MPX_BOUNDS_TABLE_TOP_BIT 11 -#define MPX_BOUNDS_DIR_BOTTOM_BIT 12 -#define MPX_BOUNDS_DIR_TOP_BIT 31 - -#else - -/* - * Linear Address of "pointer" (LAp) - * 0 -> 2: ignored - * 3 -> 19: index in to bounds table - * 20 -> 47: index in to bounds directory - * 48 -> 63: ignored - */ - -#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32 -#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */ -#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8 -#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */ - -#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3 -#define MPX_BOUNDS_TABLE_TOP_BIT 19 -#define MPX_BOUNDS_DIR_BOTTOM_BIT 20 -#define MPX_BOUNDS_DIR_TOP_BIT 47 - -#endif - -#define MPX_BOUNDS_DIR_NR_ENTRIES \ - (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES) -#define MPX_BOUNDS_TABLE_NR_ENTRIES \ - (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES) - -#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1 - -struct mpx_bd_entry { - union { - char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; - void *contents[0]; - }; -} __attribute__((packed)); - -struct mpx_bt_entry { - union { - char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; - unsigned long contents[0]; - }; -} __attribute__((packed)); - -struct mpx_bounds_dir { - struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES]; -} __attribute__((packed)); - -struct mpx_bounds_table { - struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES]; -} __attribute__((packed)); - -static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit) -{ - int total_nr_bits = topbit - bottombit; - unsigned long mask = (1UL << total_nr_bits)-1; - return (val >> bottombit) & mask; -} - -static inline unsigned long __vaddr_bounds_table_index(void *vaddr) -{ - return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT, - MPX_BOUNDS_TABLE_TOP_BIT); -} - -static inline unsigned long __vaddr_bounds_directory_index(void *vaddr) -{ - return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT, - MPX_BOUNDS_DIR_TOP_BIT); -} - -static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr, - struct mpx_bounds_dir *bounds_dir) -{ - unsigned long index = __vaddr_bounds_directory_index(vaddr); - return &bounds_dir->entries[index]; -} - -static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry) -{ - unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; - return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); -} - -static inline struct mpx_bounds_table * -__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry) -{ - unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; - assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); - __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT; - return (struct mpx_bounds_table *)__bd_entry; -} - -static inline struct mpx_bt_entry * -mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir) -{ - struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir); - struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde); - unsigned long index = __vaddr_bounds_table_index(vaddr); - return &bt->entries[index]; -} - -#endif /* _MPX_HW_H */ diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c deleted file mode 100644 index 23ddd453f362..000000000000 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ /dev/null @@ -1,1613 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions) - * - * Written by: - * "Ren, Qiaowei" <qiaowei.ren@intel.com> - * "Wei, Gang" <gang.wei@intel.com> - * "Hansen, Dave" <dave.hansen@intel.com> - */ - -/* - * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure - * it works on 32-bit. - */ - -int inspect_every_this_many_mallocs = 100; -int zap_all_every_this_many_mallocs = 1000; - -#define _GNU_SOURCE -#define _LARGEFILE64_SOURCE - -#include <string.h> -#include <stdio.h> -#include <stdint.h> -#include <stdbool.h> -#include <signal.h> -#include <assert.h> -#include <stdlib.h> -#include <ucontext.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <unistd.h> - -#include "mpx-hw.h" -#include "mpx-debug.h" -#include "mpx-mm.h" - -#ifndef __always_inline -#define __always_inline inline __attribute__((always_inline) -#endif - -#ifndef TEST_DURATION_SECS -#define TEST_DURATION_SECS 3 -#endif - -void write_int_to(char *prefix, char *file, int int_to_write) -{ - char buf[100]; - int fd = open(file, O_RDWR); - int len; - int ret; - - assert(fd >= 0); - len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write); - assert(len >= 0); - assert(len < sizeof(buf)); - ret = write(fd, buf, len); - assert(ret == len); - ret = close(fd); - assert(!ret); -} - -void write_pid_to(char *prefix, char *file) -{ - write_int_to(prefix, file, getpid()); -} - -void trace_me(void) -{ -/* tracing events dir */ -#define TED "/sys/kernel/debug/tracing/events/" -/* - write_pid_to("common_pid=", TED "signal/filter"); - write_pid_to("common_pid=", TED "exceptions/filter"); - write_int_to("", TED "signal/enable", 1); - write_int_to("", TED "exceptions/enable", 1); -*/ - write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid"); - write_int_to("", "/sys/kernel/debug/tracing/trace", 0); -} - -#define test_failed() __test_failed(__FILE__, __LINE__) -static void __test_failed(char *f, int l) -{ - fprintf(stderr, "abort @ %s::%d\n", f, l); - abort(); -} - -/* Error Printf */ -#define eprintf(args...) fprintf(stderr, args) - -#ifdef __i386__ - -/* i386 directory size is 4MB */ -#define REG_IP_IDX REG_EIP -#define REX_PREFIX - -#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate) - -/* - * __cpuid() is from the Linux Kernel: - */ -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "push %%ebx;" - "cpuid;" - "mov %%ebx, %1;" - "pop %%ebx" - : "=a" (*eax), - "=g" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -#else /* __i386__ */ - -#define REG_IP_IDX REG_RIP -#define REX_PREFIX "0x48, " - -#define XSAVE_OFFSET_IN_FPMEM 0 - -/* - * __cpuid() is from the Linux Kernel: - */ -static inline void __cpuid(unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile( - "cpuid;" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} - -#endif /* !__i386__ */ - -struct xsave_hdr_struct { - uint64_t xstate_bv; - uint64_t reserved1[2]; - uint64_t reserved2[5]; -} __attribute__((packed)); - -struct bndregs_struct { - uint64_t bndregs[8]; -} __attribute__((packed)); - -struct bndcsr_struct { - uint64_t cfg_reg_u; - uint64_t status_reg; -} __attribute__((packed)); - -struct xsave_struct { - uint8_t fpu_sse[512]; - struct xsave_hdr_struct xsave_hdr; - uint8_t ymm[256]; - uint8_t lwp[128]; - struct bndregs_struct bndregs; - struct bndcsr_struct bndcsr; -} __attribute__((packed)); - -uint8_t __attribute__((__aligned__(64))) buffer[4096]; -struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer; - -uint8_t __attribute__((__aligned__(64))) test_buffer[4096]; -struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer; - -uint64_t num_bnd_chk; - -static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask) -{ - uint32_t lmask = mask; - uint32_t hmask = mask >> 32; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static __always_inline void xsave_state_1(void *_fx, uint64_t mask) -{ - uint32_t lmask = mask; - uint32_t hmask = mask >> 32; - unsigned char *fx = _fx; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static inline uint64_t xgetbv(uint32_t index) -{ - uint32_t eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((uint64_t)edx << 32); -} - -static uint64_t read_mpx_status_sig(ucontext_t *uctxt) -{ - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, - (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM, - sizeof(struct xsave_struct)); - - return xsave_buf->bndcsr.status_reg; -} - -#include <pthread.h> - -static uint8_t *get_next_inst_ip(uint8_t *addr) -{ - uint8_t *ip = addr; - uint8_t sib; - uint8_t rm; - uint8_t mod; - uint8_t base; - uint8_t modrm; - - /* determine the prefix. */ - switch(*ip) { - case 0xf2: - case 0xf3: - case 0x66: - ip++; - break; - } - - /* look for rex prefix */ - if ((*ip & 0x40) == 0x40) - ip++; - - /* Make sure we have a MPX instruction. */ - if (*ip++ != 0x0f) - return addr; - - /* Skip the op code byte. */ - ip++; - - /* Get the modrm byte. */ - modrm = *ip++; - - /* Break it down into parts. */ - rm = modrm & 7; - mod = (modrm >> 6); - - /* Init the parts of the address mode. */ - base = 8; - - /* Is it a mem mode? */ - if (mod != 3) { - /* look for scaled indexed addressing */ - if (rm == 4) { - /* SIB addressing */ - sib = *ip++; - base = sib & 7; - switch (mod) { - case 0: - if (base == 5) - ip += 4; - break; - - case 1: - ip++; - break; - - case 2: - ip += 4; - break; - } - - } else { - /* MODRM addressing */ - switch (mod) { - case 0: - /* DISP32 addressing, no base */ - if (rm == 5) - ip += 4; - break; - - case 1: - ip++; - break; - - case 2: - ip += 4; - break; - } - } - } - return ip; -} - -#ifdef si_lower -static inline void *__si_bounds_lower(siginfo_t *si) -{ - return si->si_lower; -} - -static inline void *__si_bounds_upper(siginfo_t *si) -{ - return si->si_upper; -} -#else - -/* - * This deals with old version of _sigfault in some distros: - * - -old _sigfault: - struct { - void *si_addr; - } _sigfault; - -new _sigfault: - struct { - void __user *_addr; - int _trapno; - short _addr_lsb; - union { - struct { - void __user *_lower; - void __user *_upper; - } _addr_bnd; - __u32 _pkey; - }; - } _sigfault; - * - */ - -static inline void **__si_bounds_hack(siginfo_t *si) -{ - void *sigfault = &si->_sifields._sigfault; - void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); - int *trapno = (int*)end_sigfault; - /* skip _trapno and _addr_lsb */ - void **__si_lower = (void**)(trapno + 2); - - return __si_lower; -} - -static inline void *__si_bounds_lower(siginfo_t *si) -{ - return *__si_bounds_hack(si); -} - -static inline void *__si_bounds_upper(siginfo_t *si) -{ - return *(__si_bounds_hack(si) + 1); -} -#endif - -static int br_count; -static int expected_bnd_index = -1; -uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ -unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; - -/* Failed address bound checks: */ -#ifndef SEGV_BNDERR -# define SEGV_BNDERR 3 -#endif - -/* - * The kernel is supposed to provide some information about the bounds - * exception in the siginfo. It should match what we have in the bounds - * registers that we are checking against. Just check against the shadow copy - * since it is easily available, and we also check that *it* matches the real - * registers. - */ -void check_siginfo_vs_shadow(siginfo_t* si) -{ - int siginfo_ok = 1; - void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0]; - void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1]; - - if ((expected_bnd_index < 0) || - (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) { - fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n", - expected_bnd_index); - exit(6); - } - if (__si_bounds_lower(si) != shadow_lower) - siginfo_ok = 0; - if (__si_bounds_upper(si) != shadow_upper) - siginfo_ok = 0; - - if (!siginfo_ok) { - fprintf(stderr, "ERROR: siginfo bounds do not match " - "shadow bounds for register %d\n", expected_bnd_index); - exit(7); - } -} - -void handler(int signum, siginfo_t *si, void *vucontext) -{ - int i; - ucontext_t *uctxt = vucontext; - int trapno; - unsigned long ip; - - dprintf1("entered signal handler\n"); - - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; - - if (trapno == 5) { - typeof(si->si_addr) *si_addr_ptr = &si->si_addr; - uint64_t status = read_mpx_status_sig(uctxt); - uint64_t br_reason = status & 0x3; - - br_count++; - dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); - - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", - status, ip, br_reason); - dprintf2("si_signo: %d\n", si->si_signo); - dprintf2(" signum: %d\n", signum); - dprintf2("info->si_code == SEGV_BNDERR: %d\n", - (si->si_code == SEGV_BNDERR)); - dprintf2("info->si_code: %d\n", si->si_code); - dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); - dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); - - for (i = 0; i < 8; i++) - dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); - switch (br_reason) { - case 0: /* traditional BR */ - fprintf(stderr, - "Undefined status with bound exception:%jx\n", - status); - exit(5); - case 1: /* #BR MPX bounds exception */ - /* these are normal and we expect to see them */ - - check_siginfo_vs_shadow(si); - - dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", - status, (void *)ip, si->si_addr); - num_bnd_chk++; - uctxt->uc_mcontext.gregs[REG_IP_IDX] = - (greg_t)get_next_inst_ip((uint8_t *)ip); - break; - case 2: - fprintf(stderr, "#BR status == 2, missing bounds table," - "kernel should have handled!!\n"); - exit(4); - break; - default: - fprintf(stderr, "bound check error: status 0x%jx at %p\n", - status, (void *)ip); - num_bnd_chk++; - uctxt->uc_mcontext.gregs[REG_IP_IDX] = - (greg_t)get_next_inst_ip((uint8_t *)ip); - fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr); - exit(3); - } - } else if (trapno == 14) { - eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - eprintf("si_addr %p\n", si->si_addr); - eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - test_failed(); - } else { - eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip); - eprintf("si_addr %p\n", si->si_addr); - eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - test_failed(); - } -} - -static inline void cpuid_count(unsigned int op, int count, - unsigned int *eax, unsigned int *ebx, - unsigned int *ecx, unsigned int *edx) -{ - *eax = op; - *ecx = count; - __cpuid(eax, ebx, ecx, edx); -} - -#define XSTATE_CPUID 0x0000000d - -/* - * List of XSAVE features Linux knows about: - */ -enum xfeature_bit { - XSTATE_BIT_FP, - XSTATE_BIT_SSE, - XSTATE_BIT_YMM, - XSTATE_BIT_BNDREGS, - XSTATE_BIT_BNDCSR, - XSTATE_BIT_OPMASK, - XSTATE_BIT_ZMM_Hi256, - XSTATE_BIT_Hi16_ZMM, - - XFEATURES_NR_MAX, -}; - -#define XSTATE_FP (1 << XSTATE_BIT_FP) -#define XSTATE_SSE (1 << XSTATE_BIT_SSE) -#define XSTATE_YMM (1 << XSTATE_BIT_YMM) -#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) -#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) -#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) -#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) -#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) - -#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */ - -bool one_bit(unsigned int x, int bit) -{ - return !!(x & (1<<bit)); -} - -void print_state_component(int state_bit_nr, char *name) -{ - unsigned int eax, ebx, ecx, edx; - unsigned int state_component_size; - unsigned int state_component_supervisor; - unsigned int state_component_user; - unsigned int state_component_aligned; - - /* See SDM Section 13.2 */ - cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx); - assert(eax || ebx || ecx); - state_component_size = eax; - state_component_supervisor = ((!ebx) && one_bit(ecx, 0)); - state_component_user = !one_bit(ecx, 0); - state_component_aligned = one_bit(ecx, 1); - printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n", - name, - state_component_size, state_component_user, - state_component_supervisor, state_component_aligned); - -} - -/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */ -#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ -#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */ - -bool check_mpx_support(void) -{ - unsigned int eax, ebx, ecx, edx; - - cpuid_count(1, 0, &eax, &ebx, &ecx, &edx); - - /* We can't do much without XSAVE, so just make these assert()'s */ - if (!one_bit(ecx, XSAVE_FEATURE_BIT)) { - fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n"); - exit(0); - } - - if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) { - fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n"); - exit(0); - } - - /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */ - /* Is this redundant with the feature bit checks? */ - cpuid_count(0, 0, &eax, &ebx, &ecx, &edx); - if (eax < XSTATE_CPUID) { - fprintf(stderr, "processor lacks XSTATE CPUID leaf," - " can not run MPX tests\n"); - exit(0); - } - - printf("XSAVE is supported by HW & OS\n"); - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - - printf("XSAVE processor supported state mask: 0x%x\n", eax); - printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0)); - - /* Make sure that the MPX states are enabled in in XCR0 */ - if ((eax & MPX_XSTATES) != MPX_XSTATES) { - fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n"); - exit(0); - } - - /* Make sure the MPX states are supported by XSAVE* */ - if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) { - fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, " - "can not run MPX tests\n"); - exit(0); - } - - print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS"); - print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR"); - - return true; -} - -void enable_mpx(void *l1base) -{ - /* enable point lookup */ - memset(buffer, 0, sizeof(buffer)); - xrstor_state(xsave_buf, 0x18); - - xsave_buf->xsave_hdr.xstate_bv = 0x10; - xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1; - xsave_buf->bndcsr.status_reg = 0; - - dprintf2("bf xrstor\n"); - dprintf2("xsave cndcsr: status %jx, configu %jx\n", - xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); - xrstor_state(xsave_buf, 0x18); - dprintf2("after xrstor\n"); - - xsave_state_1(xsave_buf, 0x18); - - dprintf1("xsave bndcsr: status %jx, configu %jx\n", - xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); -} - -#include <sys/prctl.h> - -struct mpx_bounds_dir *bounds_dir_ptr; - -unsigned long __bd_incore(const char *func, int line) -{ - unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES); - return ret; -} -#define bd_incore() __bd_incore(__func__, __LINE__) - -void check_clear(void *ptr, unsigned long sz) -{ - unsigned long *i; - - for (i = ptr; (void *)i < ptr + sz; i++) { - if (*i) { - dprintf1("%p is NOT clear at %p\n", ptr, i); - assert(0); - } - } - dprintf1("%p is clear for %lx\n", ptr, sz); -} - -void check_clear_bd(void) -{ - check_clear(bounds_dir_ptr, 2UL << 30); -} - -#define USE_MALLOC_FOR_BOUNDS_DIR 1 -bool process_specific_init(void) -{ - unsigned long size; - unsigned long *dir; - /* Guarantee we have the space to align it, add padding: */ - unsigned long pad = getpagesize(); - - size = 2UL << 30; /* 2GB */ - if (sizeof(unsigned long) == 4) - size = 4UL << 20; /* 4MB */ - dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20)); - - if (USE_MALLOC_FOR_BOUNDS_DIR) { - unsigned long _dir; - - dir = malloc(size + pad); - assert(dir); - _dir = (unsigned long)dir; - _dir += 0xfffUL; - _dir &= ~0xfffUL; - dir = (void *)_dir; - } else { - /* - * This makes debugging easier because the address - * calculations are simpler: - */ - dir = mmap((void *)0x200000000000, size + pad, - PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (dir == (void *)-1) { - perror("unable to allocate bounds directory"); - abort(); - } - check_clear(dir, size); - } - bounds_dir_ptr = (void *)dir; - madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE); - bd_incore(); - dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr, - (char *)bounds_dir_ptr + size); - check_clear(dir, size); - enable_mpx(dir); - check_clear(dir, size); - if (prctl(43, 0, 0, 0, 0)) { - printf("no MPX support\n"); - abort(); - return false; - } - return true; -} - -bool process_specific_finish(void) -{ - if (prctl(44)) { - printf("no MPX support\n"); - return false; - } - return true; -} - -void setup_handler() -{ - int r, rs; - struct sigaction newact; - struct sigaction oldact; - - /* #BR is mapped to sigsegv */ - int signum = SIGSEGV; - - newact.sa_handler = 0; /* void(*)(int)*/ - newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */ - - /*sigset_t - signals to block while in the handler */ - /* get the old signal mask. */ - rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); - assert(rs == 0); - - /* call sa_sigaction, not sa_handler*/ - newact.sa_flags = SA_SIGINFO; - - newact.sa_restorer = 0; /* void(*)(), obsolete */ - r = sigaction(signum, &newact, &oldact); - assert(r == 0); -} - -void mpx_prepare(void) -{ - dprintf2("%s()\n", __func__); - setup_handler(); - process_specific_init(); -} - -void mpx_cleanup(void) -{ - printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk); - process_specific_finish(); -} - -/*-------------- the following is test case ---------------*/ -#include <stdint.h> -#include <stdbool.h> -#include <stdlib.h> -#include <stdio.h> -#include <time.h> - -uint64_t num_lower_brs; -uint64_t num_upper_brs; - -#define MPX_CONFIG_OFFSET 1024 -#define MPX_BOUNDS_OFFSET 960 -#define MPX_HEADER_OFFSET 512 -#define MAX_ADDR_TESTED (1<<28) -#define TEST_ROUNDS 100 - -/* - 0F 1A /r BNDLDX-Load - 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation - 66 0F 1A /r BNDMOV bnd1, bnd2/m128 - 66 0F 1B /r BNDMOV bnd1/m128, bnd2 - F2 0F 1A /r BNDCU bnd, r/m64 - F2 0F 1B /r BNDCN bnd, r/m64 - F3 0F 1A /r BNDCL bnd, r/m64 - F3 0F 1B /r BNDMK bnd, m64 -*/ - -static __always_inline void xsave_state(void *_fx, uint64_t mask) -{ - uint32_t lmask = mask; - uint32_t hmask = mask >> 32; - unsigned char *fx = _fx; - - asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" - : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); -} - -static __always_inline void mpx_clear_bnd0(void) -{ - long size = 0; - void *ptr = NULL; - /* F3 0F 1B /r BNDMK bnd, m64 */ - /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ - asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" - : : "c" (ptr), "d" (size-1) - : "memory"); -} - -static __always_inline void mpx_make_bound_helper(unsigned long ptr, - unsigned long size) -{ - /* F3 0F 1B /r BNDMK bnd, m64 */ - /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ - asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" - : : "c" (ptr), "d" (size-1) - : "memory"); -} - -static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr) -{ - /* F3 0F 1A /r NDCL bnd, r/m64 */ - /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */ - asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t" - : : "c" (ptr) - : "memory"); -} - -static __always_inline void mpx_check_upperbound_helper(unsigned long ptr) -{ - /* F2 0F 1A /r BNDCU bnd, r/m64 */ - /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */ - asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t" - : : "c" (ptr) - : "memory"); -} - -static __always_inline void mpx_movbndreg_helper() -{ - /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ - /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */ - - asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t"); -} - -static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem) -{ - /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ - /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */ - asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t" - : : "c" (mem) - : "memory"); -} - -static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem) -{ - /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */ - /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */ - asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t" - : : "c" (mem) - : "memory"); -} - -static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr, - unsigned long ptr_val) -{ - /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */ - /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */ - asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t" - : : "c" (ptr_addr), "d" (ptr_val) - : "memory"); -} - -static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr, - unsigned long ptr_val) -{ - /* 0F 1A /r BNDLDX-Load */ - /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */ - asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t" - : : "c" (ptr_addr), "d" (ptr_val) - : "memory"); -} - -void __print_context(void *__print_xsave_buffer, int line) -{ - uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET); - uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET); - - int i; - eprintf("%s()::%d\n", "print_context", line); - for (i = 0; i < 4; i++) { - eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i, - (unsigned long)bounds[i*2], - ~(unsigned long)bounds[i*2+1], - (unsigned long)bounds[i*2+1]); - } - - eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]); -} -#define print_context(x) __print_context(x, __LINE__) -#ifdef DEBUG -#define dprint_context(x) print_context(x) -#else -#define dprint_context(x) do{}while(0) -#endif - -void init() -{ - int i; - - srand((unsigned int)time(NULL)); - - for (i = 0; i < 4; i++) { - shadow_plb[i][0] = 0; - shadow_plb[i][1] = ~(unsigned long)0; - } -} - -long int __mpx_random(int line) -{ -#ifdef NOT_SO_RANDOM - static long fake = 722122311; - fake += 563792075; - return fakse; -#else - return random(); -#endif -} -#define mpx_random() __mpx_random(__LINE__) - -uint8_t *get_random_addr() -{ - uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED); - return (addr - (unsigned long)addr % sizeof(uint8_t *)); -} - -static inline bool compare_context(void *__xsave_buffer) -{ - uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET); - - int i; - for (i = 0; i < 4; i++) { - dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n", - i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], - i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]); - if ((shadow_plb[i][0] != bounds[i*2]) || - (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) { - eprintf("ERROR comparing shadow to real bound register %d\n", i); - eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n", - (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], - (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]); - return false; - } - } - - return true; -} - -void mkbnd_shadow(uint8_t *ptr, int index, long offset) -{ - uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); - uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]); - *lower = (unsigned long)ptr; - *upper = (unsigned long)ptr + offset - 1; -} - -void check_lowerbound_shadow(uint8_t *ptr, int index) -{ - uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); - if (*lower > (uint64_t)(unsigned long)ptr) - num_lower_brs++; - else - dprintf1("LowerBoundChk passed:%p\n", ptr); -} - -void check_upperbound_shadow(uint8_t *ptr, int index) -{ - uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]); - if (upper < (uint64_t)(unsigned long)ptr) - num_upper_brs++; - else - dprintf1("UpperBoundChk passed:%p\n", ptr); -} - -__always_inline void movbndreg_shadow(int src, int dest) -{ - shadow_plb[dest][0] = shadow_plb[src][0]; - shadow_plb[dest][1] = shadow_plb[src][1]; -} - -__always_inline void movbnd2mem_shadow(int src, unsigned long *dest) -{ - unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]); - unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]); - *dest = *lower; - *(dest+1) = *upper; -} - -__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest) -{ - unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]); - unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]); - *lower = *src; - *upper = *(src+1); -} - -__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) -{ - shadow_map[0] = (unsigned long)shadow_plb[index][0]; - shadow_map[1] = (unsigned long)shadow_plb[index][1]; - shadow_map[2] = (unsigned long)ptr_val; - dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__, - index, ptr, ptr_val, ptr_val); - /*ptr ignored */ -} - -void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) -{ - uint64_t lower = shadow_map[0]; - uint64_t upper = shadow_map[1]; - uint8_t *value = (uint8_t *)shadow_map[2]; - - if (value != ptr_val) { - dprintf2("%s(%d, %p, %p) init shadow bounds[%d] " - "because %p != %p\n", __func__, index, ptr, - ptr_val, index, value, ptr_val); - shadow_plb[index][0] = 0; - shadow_plb[index][1] = ~(unsigned long)0; - } else { - shadow_plb[index][0] = lower; - shadow_plb[index][1] = upper; - } - /* ptr ignored */ -} - -static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr) -{ - mpx_make_bound_helper((unsigned long)ptr, 0x1800); -} - -static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr) -{ - mkbnd_shadow(ptr, 0, 0x1800); -} - -static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr) -{ - /* these are hard-coded to check bnd0 */ - expected_bnd_index = 0; - mpx_check_lowerbound_helper((unsigned long)(ptr-1)); - mpx_check_upperbound_helper((unsigned long)(ptr+0x1800)); - /* reset this since we do not expect any more bounds exceptions */ - expected_bnd_index = -1; -} - -static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr) -{ - check_lowerbound_shadow(ptr-1, 0); - check_upperbound_shadow(ptr+0x1800, 0); -} - -static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr) -{ - mpx_make_bound_helper((unsigned long)ptr, 0x1800); - mpx_movbndreg_helper(); - mpx_movbnd2mem_helper(buf); - mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); -} - -static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr) -{ - mkbnd_shadow(ptr, 0, 0x1800); - movbndreg_shadow(0, 2); - movbnd2mem_shadow(0, (unsigned long *)buf); - mkbnd_shadow(ptr+0x12, 0, 0x1800); -} - -static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr) -{ - mpx_movbnd_from_mem_helper(buf); -} - -static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr) -{ - movbnd_from_mem_shadow((unsigned long *)buf, 0); -} - -static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr) -{ - mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr); - mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); -} - -static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr) -{ - stdsc_shadow(0, buf, ptr); - mkbnd_shadow(ptr+0x12, 0, 0x1800); -} - -static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr) -{ - mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr); -} - -static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr) -{ - lddsc_shadow(0, buf, ptr); -} - -#define NR_MPX_TEST_FUNCTIONS 6 - -/* - * For compatibility reasons, MPX will clear the bounds registers - * when you make function calls (among other things). We have to - * preserve the registers in between calls to the "helpers" since - * they build on each other. - * - * Be very careful not to make any function calls inside the - * helpers, or anywhere else beween the xrstor and xsave. - */ -#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \ - xrstor_state(xsave_test_buf, flags); \ - mpx_test_helper##helper_nr(buf, ptr); \ - xsave_state(xsave_test_buf, flags); \ - mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \ -} while (0) - -static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr) -{ - uint64_t flags = 0x18; - - dprint_context(xsave_test_buf); - switch (nr) { - case 0: - run_helper(0, buf, buf_shadow, ptr); - break; - case 1: - run_helper(1, buf, buf_shadow, ptr); - break; - case 2: - run_helper(2, buf, buf_shadow, ptr); - break; - case 3: - run_helper(3, buf, buf_shadow, ptr); - break; - case 4: - run_helper(4, buf, buf_shadow, ptr); - break; - case 5: - run_helper(5, buf, buf_shadow, ptr); - break; - default: - test_failed(); - break; - } - dprint_context(xsave_test_buf); -} - -unsigned long buf_shadow[1024]; /* used to check load / store descriptors */ -extern long inspect_me(struct mpx_bounds_dir *bounds_dir); - -long cover_buf_with_bt_entries(void *buf, long buf_len) -{ - int i; - long nr_to_fill; - int ratio = 1000; - unsigned long buf_len_in_ptrs; - - /* Fill about 1/100 of the space with bt entries */ - nr_to_fill = buf_len / (sizeof(unsigned long) * ratio); - - if (!nr_to_fill) - dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill); - - /* Align the buffer to pointer size */ - while (((unsigned long)buf) % sizeof(void *)) { - buf++; - buf_len--; - } - /* We are storing pointers, so make */ - buf_len_in_ptrs = buf_len / sizeof(void *); - - for (i = 0; i < nr_to_fill; i++) { - long index = (mpx_random() % buf_len_in_ptrs); - void *ptr = buf + index * sizeof(unsigned long); - unsigned long ptr_addr = (unsigned long)ptr; - - /* ptr and size can be anything */ - mpx_make_bound_helper((unsigned long)ptr, 8); - - /* - * take bnd0 and put it in to bounds tables "buf + index" is an - * address inside the buffer where we are pretending that we - * are going to put a pointer We do not, though because we will - * never load entries from the table, so it doesn't matter. - */ - mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr); - dprintf4("storing bound table entry for %lx (buf start @ %p)\n", - ptr_addr, buf); - } - return nr_to_fill; -} - -unsigned long align_down(unsigned long alignme, unsigned long align_to) -{ - return alignme & ~(align_to-1); -} - -unsigned long align_up(unsigned long alignme, unsigned long align_to) -{ - return (alignme + align_to - 1) & ~(align_to-1); -} - -/* - * Using 1MB alignment guarantees that each no allocation - * will overlap with another's bounds tables. - * - * We have to cook our own allocator here. malloc() can - * mix other allocation with ours which means that even - * if we free all of our allocations, there might still - * be bounds tables for the *areas* since there is other - * valid memory there. - * - * We also can't use malloc() because a free() of an area - * might not free it back to the kernel. We want it - * completely unmapped an malloc() does not guarantee - * that. - */ -#ifdef __i386__ -long alignment = 4096; -long sz_alignment = 4096; -#else -long alignment = 1 * MB; -long sz_alignment = 1 * MB; -#endif -void *mpx_mini_alloc(unsigned long sz) -{ - unsigned long long tries = 0; - static void *last; - void *ptr; - void *try_at; - - sz = align_up(sz, sz_alignment); - - try_at = last + alignment; - while (1) { - ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (ptr == (void *)-1) - return NULL; - if (ptr == try_at) - break; - - munmap(ptr, sz); - try_at += alignment; -#ifdef __i386__ - /* - * This isn't quite correct for 32-bit binaries - * on 64-bit kernels since they can use the - * entire 32-bit address space, but it's close - * enough. - */ - if (try_at > (void *)0xC0000000) -#else - if (try_at > (void *)0x0000800000000000) -#endif - try_at = (void *)0x0; - if (!(++tries % 10000)) - dprintf1("stuck in %s(), tries: %lld\n", __func__, tries); - continue; - } - last = ptr; - dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr); - return ptr; -} -void mpx_mini_free(void *ptr, long sz) -{ - dprintf2("%s() ptr: %p\n", __func__, ptr); - if ((unsigned long)ptr > 0x100000000000) { - dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr); - test_failed(); - } - sz = align_up(sz, sz_alignment); - dprintf3("%s() ptr: %p before munmap\n", __func__, ptr); - munmap(ptr, sz); - dprintf3("%s() ptr: %p DONE\n", __func__, ptr); -} - -#define NR_MALLOCS 100 -struct one_malloc { - char *ptr; - int nr_filled_btes; - unsigned long size; -}; -struct one_malloc mallocs[NR_MALLOCS]; - -void free_one_malloc(int index) -{ - unsigned long free_ptr; - unsigned long mask; - - if (!mallocs[index].ptr) - return; - - mpx_mini_free(mallocs[index].ptr, mallocs[index].size); - dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr); - - free_ptr = (unsigned long)mallocs[index].ptr; - mask = alignment-1; - dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr, - (free_ptr & mask), mask); - assert((free_ptr & mask) == 0); - - mallocs[index].ptr = NULL; -} - -#ifdef __i386__ -#define MPX_BOUNDS_TABLE_COVERS 4096 -#else -#define MPX_BOUNDS_TABLE_COVERS (1 * MB) -#endif -void zap_everything(void) -{ - long after_zap; - long before_zap; - int i; - - before_zap = inspect_me(bounds_dir_ptr); - dprintf1("zapping everything start: %ld\n", before_zap); - for (i = 0; i < NR_MALLOCS; i++) - free_one_malloc(i); - - after_zap = inspect_me(bounds_dir_ptr); - dprintf1("zapping everything done: %ld\n", after_zap); - /* - * We only guarantee to empty the thing out if our allocations are - * exactly aligned on the boundaries of a boudns table. - */ - if ((alignment >= MPX_BOUNDS_TABLE_COVERS) && - (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) { - if (after_zap != 0) - test_failed(); - - assert(after_zap == 0); - } -} - -void do_one_malloc(void) -{ - static int malloc_counter; - long sz; - int rand_index = (mpx_random() % NR_MALLOCS); - void *ptr = mallocs[rand_index].ptr; - - dprintf3("%s() enter\n", __func__); - - if (ptr) { - dprintf3("freeing one malloc at index: %d\n", rand_index); - free_one_malloc(rand_index); - if (mpx_random() % (NR_MALLOCS*3) == 3) { - int i; - dprintf3("zapping some more\n"); - for (i = rand_index; i < NR_MALLOCS; i++) - free_one_malloc(i); - } - if ((mpx_random() % zap_all_every_this_many_mallocs) == 4) - zap_everything(); - } - - /* 1->~1M */ - sz = (1 + mpx_random() % 1000) * 1000; - ptr = mpx_mini_alloc(sz); - if (!ptr) { - /* - * If we are failing allocations, just assume we - * are out of memory and zap everything. - */ - dprintf3("zapping everything because out of memory\n"); - zap_everything(); - goto out; - } - - dprintf3("malloc: %p size: 0x%lx\n", ptr, sz); - mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz); - mallocs[rand_index].ptr = ptr; - mallocs[rand_index].size = sz; -out: - if ((++malloc_counter) % inspect_every_this_many_mallocs == 0) - inspect_me(bounds_dir_ptr); -} - -void run_timed_test(void (*test_func)(void)) -{ - int done = 0; - long iteration = 0; - static time_t last_print; - time_t now; - time_t start; - - time(&start); - while (!done) { - time(&now); - if ((now - start) > TEST_DURATION_SECS) - done = 1; - - test_func(); - iteration++; - - if ((now - last_print > 1) || done) { - printf("iteration %ld complete, OK so far\n", iteration); - last_print = now; - } - } -} - -void check_bounds_table_frees(void) -{ - printf("executing unmaptest\n"); - inspect_me(bounds_dir_ptr); - run_timed_test(&do_one_malloc); - printf("done with malloc() fun\n"); -} - -void insn_test_failed(int test_nr, int test_round, void *buf, - void *buf_shadow, void *ptr) -{ - print_context(xsave_test_buf); - eprintf("ERROR: test %d round %d failed\n", test_nr, test_round); - while (test_nr == 5) { - struct mpx_bt_entry *bte; - struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr; - struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd); - - printf(" bd: %p\n", bd); - printf("&bde: %p\n", bde); - printf("*bde: %lx\n", *(unsigned long *)bde); - if (!bd_entry_valid(bde)) - break; - - bte = mpx_vaddr_to_bt_entry(buf, bd); - printf(" te: %p\n", bte); - printf("bte[0]: %lx\n", bte->contents[0]); - printf("bte[1]: %lx\n", bte->contents[1]); - printf("bte[2]: %lx\n", bte->contents[2]); - printf("bte[3]: %lx\n", bte->contents[3]); - break; - } - test_failed(); -} - -void check_mpx_insns_and_tables(void) -{ - int successes = 0; - int failures = 0; - int buf_size = (1024*1024); - unsigned long *buf = malloc(buf_size); - const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS; - int i, j; - - memset(buf, 0, buf_size); - memset(buf_shadow, 0, sizeof(buf_shadow)); - - for (i = 0; i < TEST_ROUNDS; i++) { - uint8_t *ptr = get_random_addr() + 8; - - for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) { - if (0 && j != 5) { - successes++; - continue; - } - dprintf2("starting test %d round %d\n", j, i); - dprint_context(xsave_test_buf); - /* - * test5 loads an address from the bounds tables. - * The load will only complete if 'ptr' matches - * the load and the store, so with random addrs, - * the odds of this are very small. Make it - * higher by only moving 'ptr' 1/10 times. - */ - if (random() % 10 <= 0) - ptr = get_random_addr() + 8; - dprintf3("random ptr{%p}\n", ptr); - dprint_context(xsave_test_buf); - run_helpers(j, (void *)buf, (void *)buf_shadow, ptr); - dprint_context(xsave_test_buf); - if (!compare_context(xsave_test_buf)) { - insn_test_failed(j, i, buf, buf_shadow, ptr); - failures++; - goto exit; - } - successes++; - dprint_context(xsave_test_buf); - dprintf2("finished test %d round %d\n", j, i); - dprintf3("\n"); - dprint_context(xsave_test_buf); - } - } - -exit: - dprintf2("\nabout to free:\n"); - free(buf); - dprintf1("successes: %d\n", successes); - dprintf1(" failures: %d\n", failures); - dprintf1(" tests: %d\n", total_nr_tests); - dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); - dprintf1(" saw: %d #BRs\n", br_count); - if (failures) { - eprintf("ERROR: non-zero number of failures\n"); - exit(20); - } - if (successes != total_nr_tests) { - eprintf("ERROR: succeeded fewer than number of tries (%d != %d)\n", - successes, total_nr_tests); - exit(21); - } - if (num_upper_brs + num_lower_brs != br_count) { - eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n", - num_upper_brs, num_lower_brs, br_count); - eprintf("successes: %d\n", successes); - eprintf(" failures: %d\n", failures); - eprintf(" tests: %d\n", total_nr_tests); - eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); - eprintf(" saw: %d #BRs\n", br_count); - exit(22); - } -} - -/* - * This is supposed to SIGSEGV nicely once the kernel - * can no longer allocate vaddr space. - */ -void exhaust_vaddr_space(void) -{ - unsigned long ptr; - /* Try to make sure there is no room for a bounds table anywhere */ - unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE; -#ifdef __i386__ - unsigned long max_vaddr = 0xf7788000UL; -#else - unsigned long max_vaddr = 0x800000000000UL; -#endif - - dprintf1("%s() start\n", __func__); - /* do not start at 0, we aren't allowed to map there */ - for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { - void *ptr_ret; - int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL); - - if (!ret) { - dprintf1("madvise() %lx ret: %d\n", ptr, ret); - continue; - } - ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE, - MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); - if (ptr_ret != (void *)ptr) { - perror("mmap"); - dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); - break; - } - if (!(ptr & 0xffffff)) - dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); - } - for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { - dprintf2("covering 0x%lx with bounds table entries\n", ptr); - cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE); - } - dprintf1("%s() end\n", __func__); - printf("done with vaddr space fun\n"); -} - -void mpx_table_test(void) -{ - printf("starting mpx bounds table test\n"); - run_timed_test(check_mpx_insns_and_tables); - printf("done with mpx bounds table test\n"); -} - -int main(int argc, char **argv) -{ - int unmaptest = 0; - int vaddrexhaust = 0; - int tabletest = 0; - int i; - - check_mpx_support(); - mpx_prepare(); - srandom(11179); - - bd_incore(); - init(); - bd_incore(); - - trace_me(); - - xsave_state((void *)xsave_test_buf, 0x1f); - if (!compare_context(xsave_test_buf)) - printf("Init failed\n"); - - for (i = 1; i < argc; i++) { - if (!strcmp(argv[i], "unmaptest")) - unmaptest = 1; - if (!strcmp(argv[i], "vaddrexhaust")) - vaddrexhaust = 1; - if (!strcmp(argv[i], "tabletest")) - tabletest = 1; - } - if (!(unmaptest || vaddrexhaust || tabletest)) { - unmaptest = 1; - /* vaddrexhaust = 1; */ - tabletest = 1; - } - if (unmaptest) - check_bounds_table_frees(); - if (tabletest) - mpx_table_test(); - if (vaddrexhaust) - exhaust_vaddr_space(); - printf("%s completed successfully\n", argv[0]); - exit(0); -} - -#include "mpx-dig.c" diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h deleted file mode 100644 index 6dbdd66b8242..000000000000 --- a/tools/testing/selftests/x86/mpx-mm.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _MPX_MM_H -#define _MPX_MM_H - -#define PAGE_SIZE 4096 -#define MB (1UL<<20) - -extern long nr_incore(void *ptr, unsigned long size_bytes); - -#endif /* _MPX_MM_H */ diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 3e49a7873f3e..57c4f67f16ef 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -451,6 +451,19 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void) ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; ctx->uc_mcontext.gregs[REG_CX] = 0; +#ifdef __i386__ + /* + * Make sure the kernel doesn't inadvertently use DS or ES-relative + * accesses in a region where user DS or ES is loaded. + * + * Skip this for 64-bit builds because long mode doesn't care about + * DS and ES and skipping it increases test coverage a little bit, + * since 64-bit kernels can still run the 32-bit build. + */ + ctx->uc_mcontext.gregs[REG_DS] = 0; + ctx->uc_mcontext.gregs[REG_ES] = 0; +#endif + memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */ diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 50ce6c3dd904..1063328e275c 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -43,7 +43,19 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), err(1, "sigaction"); } -static volatile sig_atomic_t sig_traps; +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps, sig_eflags; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; #ifdef __x86_64__ # define REG_IP REG_RIP @@ -90,6 +102,25 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void) } } +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void print_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx, TF=%ld\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_TF); + + sig_eflags = (unsigned long)ctx->uc_mcontext.gregs[REG_EFL]; + siglongjmp(jmpbuf, 1); +} + static void check_result(void) { unsigned long new_eflags = get_eflags(); @@ -109,6 +140,22 @@ static void check_result(void) sig_traps = 0; } +static void fast_syscall_no_tf(void) +{ + sig_traps = 0; + printf("[RUN]\tFast syscall with TF cleared\n"); + fflush(stdout); /* Force a syscall */ + if (get_eflags() & X86_EFLAGS_TF) { + printf("[FAIL]\tTF is now set\n"); + exit(1); + } + if (sig_traps) { + printf("[FAIL]\tGot SIGTRAP\n"); + exit(1); + } + printf("[OK]\tNothing unexpected happened\n"); +} + int main() { #ifdef CAN_BUILD_32 @@ -163,17 +210,46 @@ int main() check_result(); /* Now make sure that another fast syscall doesn't set TF again. */ - printf("[RUN]\tFast syscall with TF cleared\n"); - fflush(stdout); /* Force a syscall */ - if (get_eflags() & X86_EFLAGS_TF) { - printf("[FAIL]\tTF is now set\n"); - exit(1); + fast_syscall_no_tf(); + + /* + * And do a forced SYSENTER to make sure that this works even if + * fast syscalls don't use SYSENTER. + * + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + unsigned long nr = SYS_getpid; + printf("[RUN]\tSet TF and check SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, print_and_longjmp, + SA_RESETHAND | SA_ONSTACK); + sethandler(SIGILL, print_and_longjmp, SA_RESETHAND); + set_eflags(get_eflags() | X86_EFLAGS_TF); + /* Clear EBP first to make sure we segfault cleanly. */ + asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ } - if (sig_traps) { - printf("[FAIL]\tGot SIGTRAP\n"); + clearhandler(SIGSEGV); + clearhandler(SIGILL); + if (!(sig_eflags & X86_EFLAGS_TF)) { + printf("[FAIL]\tTF was cleared\n"); exit(1); } - printf("[OK]\tNothing unexpected happened\n"); + + /* Now make sure that another fast syscall doesn't set TF again. */ + fast_syscall_no_tf(); return 0; } diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c new file mode 100644 index 000000000000..d6b09cb1aa2c --- /dev/null +++ b/tools/testing/selftests/x86/syscall_numbering.c @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * Copyright (c) 2018 Andrew Lutomirski + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <errno.h> +#include <unistd.h> +#include <syscall.h> + +static int nerrs; + +#define X32_BIT 0x40000000UL + +static void check_enosys(unsigned long nr, bool *ok) +{ + /* If this fails, a segfault is reasonably likely. */ + fflush(stdout); + + long ret = syscall(nr, 0, 0, 0, 0, 0, 0); + if (ret == 0) { + printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr); + *ok = false; + } else if (errno != ENOSYS) { + printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno); + *ok = false; + } +} + +static void test_x32_without_x32_bit(void) +{ + bool ok = true; + + /* + * Syscalls 512-547 are "x32" syscalls. They are intended to be + * called with the x32 (0x40000000) bit set. Calling them without + * the x32 bit set is nonsense and should not work. + */ + printf("[RUN]\tChecking syscalls 512-547\n"); + for (int i = 512; i <= 547; i++) + check_enosys(i, &ok); + + /* + * Check that a handful of 64-bit-only syscalls are rejected if the x32 + * bit is set. + */ + printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n"); + check_enosys(16 | X32_BIT, &ok); /* ioctl */ + check_enosys(19 | X32_BIT, &ok); /* readv */ + check_enosys(20 | X32_BIT, &ok); /* writev */ + + /* + * Check some syscalls with high bits set. + */ + printf("[RUN]\tChecking numbers above 2^32-1\n"); + check_enosys((1UL << 32), &ok); + check_enosys(X32_BIT | (1UL << 32), &ok); + + if (!ok) + nerrs++; + else + printf("[OK]\tThey all returned -ENOSYS\n"); +} + +int main() +{ + /* + * Anyone diagnosing a failure will want to know whether the kernel + * supports x32. Tell them. + */ + printf("\tChecking for x32..."); + fflush(stdout); + if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) { + printf(" supported\n"); + } else if (errno == ENOSYS) { + printf(" not supported\n"); + } else { + printf(" confused\n"); + } + + test_x32_without_x32_bit(); + + return nerrs ? 1 : 0; +} |